Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -548,8 +548,9 @@ /// pair of exact and max expressions that are eventually summarized in /// ExitNotTakenInfo and BackedgeTakenInfo. struct ExitLimit { - const SCEV *ExactNotTaken; - const SCEV *MaxNotTaken; + const SCEV *ExactNotTaken; //< The exit is not taken exactly this many times + const SCEV *MaxNotTaken; //< The exit is not taken at most this many times + bool MaxOrZero; //< Not taken either exactly MaxNotTaken or zero times /// A set of predicate guards for this ExitLimit. The result is only valid /// if all of the predicates in \c Predicates evaluate to 'true' at @@ -561,12 +562,13 @@ Predicates.insert(P); } - /*implicit*/ ExitLimit(const SCEV *E) : ExactNotTaken(E), MaxNotTaken(E) {} + /*implicit*/ ExitLimit(const SCEV *E) + : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {} ExitLimit( - const SCEV *E, const SCEV *M, + const SCEV *E, const SCEV *M, bool MaxOrZero, ArrayRef *> PredSetList) - : ExactNotTaken(E), MaxNotTaken(M) { + : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { assert((isa(ExactNotTaken) || !isa(MaxNotTaken)) && "Exact is not allowed to be less precise than Max"); @@ -575,11 +577,12 @@ addPredicate(P); } - ExitLimit(const SCEV *E, const SCEV *M, + ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero, const SmallPtrSetImpl &PredSet) - : ExitLimit(E, M, {&PredSet}) {} + : ExitLimit(E, M, MaxOrZero, {&PredSet}) {} - ExitLimit(const SCEV *E, const SCEV *M) : ExitLimit(E, M, None) {} + ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero) + : ExitLimit(E, M, MaxOrZero, None) {} /// Test whether this ExitLimit contains any computed information, or /// whether it's all SCEVCouldNotCompute values. @@ -644,6 +647,9 @@ /// ExitNotTaken has an element for every exiting block in the loop. PointerIntPair MaxAndComplete; + /// True iff the backedge is taken either exactly Max or zero times. + bool MaxOrZero; + /// \name Helper projection functions on \c MaxAndComplete. /// @{ bool isComplete() const { return MaxAndComplete.getInt(); } @@ -658,11 +664,13 @@ BackedgeTakenInfo(BackedgeTakenInfo &&Other) { ExitNotTaken = std::move(Other.ExitNotTaken); MaxAndComplete = std::move(Other.MaxAndComplete); + MaxOrZero = std::move(Other.MaxOrZero); } BackedgeTakenInfo &operator=(BackedgeTakenInfo &&Other) { ExitNotTaken = std::move(Other.ExitNotTaken); MaxAndComplete = std::move(Other.MaxAndComplete); + MaxOrZero = std::move(Other.MaxOrZero); return *this; } @@ -670,7 +678,7 @@ /// Initialize BackedgeTakenInfo from a list of exact exit counts. BackedgeTakenInfo(SmallVectorImpl &&ExitCounts, bool Complete, - const SCEV *MaxCount); + const SCEV *MaxCount, bool MaxOrZero); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. @@ -706,8 +714,9 @@ /// block. const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; - /// Get the max backedge taken count for the loop. - const SCEV *getMax(ScalarEvolution *SE) const; + /// Get the max backedge taken count for the loop, and set *MaxOrZero iff + /// the actual backedge taken count is either this max or zero. + const SCEV *getMax(ScalarEvolution *SE, bool *MaxOrZero = nullptr) const; /// Return true if any backedge taken count expressions refer to the given /// subexpression. @@ -1333,7 +1342,9 @@ /// Returns the upper bound of the loop trip count as a normal unsigned /// value. /// Returns 0 if the trip count is unknown or not constant. - unsigned getSmallConstantMaxTripCount(Loop *L); + /// Sets *MaxOrZero to true iff the actual backedge taken count is either the + /// returned value or zero. + unsigned getSmallConstantMaxTripCount(Loop *L, bool *MaxOrZero = nullptr); /// Returns the largest constant divisor of the trip count of the /// loop if it is a single-exit loop and we can compute a small maximum for @@ -1378,7 +1389,10 @@ /// Similar to getBackedgeTakenCount, except return the least SCEV value /// that is known never to be less than the actual backedge taken count. - const SCEV *getMaxBackedgeTakenCount(const Loop *L); + /// Sets *MaxOrZero to true iff the actual backedge taken count is either the + /// returned value or zero. + const SCEV *getMaxBackedgeTakenCount(const Loop *L, + bool *MaxOrZero = nullptr); /// Return true if the specified loop has an analyzable loop-invariant /// backedge-taken count. Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -32,8 +32,9 @@ bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, - bool UseUpperBound, unsigned TripMultiple, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + bool PreserveCondBr, bool PreserveOnlyFirst, + unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA); bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -5324,9 +5324,10 @@ return getConstantTripCount(ExitCount); } -unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) { +unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L, + bool *MaxOrZero) { const auto *MaxExitCount = - dyn_cast(getMaxBackedgeTakenCount(L)); + dyn_cast(getMaxBackedgeTakenCount(L, MaxOrZero)); return getConstantTripCount(MaxExitCount); } @@ -5402,8 +5403,9 @@ /// Similar to getBackedgeTakenCount, except return the least SCEV value that is /// known never to be less than the actual backedge taken count. -const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).getMax(this); +const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L, + bool *MaxOrZero) { + return getBackedgeTakenInfo(L).getMax(this, MaxOrZero); } /// Push PHI nodes in the header of the given loop onto the given Worklist. @@ -5627,15 +5629,20 @@ /// getMax - Get the max backedge taken count for the loop. const SCEV * -ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { +ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE, + bool *MaxOrZero) const { auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { return !ENT.hasAlwaysTruePredicate(); }; - if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) - return SE->getCouldNotCompute(); + const SCEV *Result = getMax(); + if (!Result || any_of(ExitNotTaken, PredicateNotAlwaysTrue)) + Result = SE->getCouldNotCompute(); + + if (MaxOrZero) + *MaxOrZero = (Result != SE->getCouldNotCompute() && this->MaxOrZero); - return getMax(); + return Result; } bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, @@ -5657,8 +5664,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( SmallVectorImpl &&ExitCounts, - bool Complete, const SCEV *MaxCount) - : MaxAndComplete(MaxCount, Complete) { + bool Complete, const SCEV *MaxCount, bool MaxOrZero) + : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo; ExitNotTaken.reserve(ExitCounts.size()); std::transform( @@ -5696,6 +5703,7 @@ BasicBlock *Latch = L->getLoopLatch(); // may be NULL. const SCEV *MustExitMaxBECount = nullptr; const SCEV *MayExitMaxBECount = nullptr; + bool MustExitMaxOrZero = false; // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts // and compute maxBECount. @@ -5728,9 +5736,10 @@ // computable EL.MaxNotTaken. if (EL.MaxNotTaken != getCouldNotCompute() && Latch && DT.dominates(ExitBB, Latch)) { - if (!MustExitMaxBECount) + if (!MustExitMaxBECount) { MustExitMaxBECount = EL.MaxNotTaken; - else { + MustExitMaxOrZero = EL.MaxOrZero; + } else { MustExitMaxBECount = getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken); } @@ -5745,8 +5754,11 @@ } const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); + // The loop backedge will be taken the maximum or zero times if there's + // a single exit that must be taken the maximum or zero times. + bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1); return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, - MaxBECount); + MaxBECount, MaxOrZero); } ScalarEvolution::ExitLimit @@ -5883,7 +5895,8 @@ !isa(BECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, {&EL0.Predicates, &EL1.Predicates}); + return ExitLimit(BECount, MaxBECount, false, + {&EL0.Predicates, &EL1.Predicates}); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. @@ -5922,7 +5935,8 @@ BECount = EL0.ExactNotTaken; } - return ExitLimit(BECount, MaxBECount, {&EL0.Predicates, &EL1.Predicates}); + return ExitLimit(BECount, MaxBECount, false, + {&EL0.Predicates, &EL1.Predicates}); } } @@ -6307,7 +6321,7 @@ unsigned BitWidth = getTypeSizeInBits(RHS->getType()); const SCEV *UpperBound = getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); - return ExitLimit(getCouldNotCompute(), UpperBound); + return ExitLimit(getCouldNotCompute(), UpperBound, false); } return getCouldNotCompute(); @@ -7103,7 +7117,8 @@ // should not accept a root of 2. const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) - return ExitLimit(R1, R1, Predicates); // We found a quadratic root! + // We found a quadratic root! + return ExitLimit(R1, R1, false, Predicates); } } return getCouldNotCompute(); @@ -7160,7 +7175,7 @@ else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount, Predicates); + return ExitLimit(Distance, MaxBECount, false, Predicates); } // As a special case, handle the instance where Step is a positive power of @@ -7215,7 +7230,7 @@ const SCEV *Limit = getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); - return ExitLimit(Limit, Limit, Predicates); + return ExitLimit(Limit, Limit, false, Predicates); } } @@ -7228,14 +7243,14 @@ loopHasNoAbnormalExits(AddRec->getLoop())) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact, Predicates); + return ExitLimit(Exact, Exact, false, Predicates); } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) { const SCEV *E = SolveLinEquationWithOverflow( StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); - return ExitLimit(E, E, Predicates); + return ExitLimit(E, E, false, Predicates); } return getCouldNotCompute(); } @@ -8677,14 +8692,16 @@ } const SCEV *MaxBECount; + bool MaxOrZero = false; if (isa(BECount)) MaxBECount = BECount; - else if (isa(BECountIfBackedgeTaken)) + else if (isa(BECountIfBackedgeTaken)) { // If we know exactly how many times the backedge will be taken if it's // taken at least once, then the backedge count will either be that or // zero. MaxBECount = BECountIfBackedgeTaken; - else { + MaxOrZero = true; + } else { // Calculate the maximum backedge count based on the range of values // permitted by Start, End, and Stride. APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() @@ -8721,7 +8738,7 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, Predicates); + return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); } ScalarEvolution::ExitLimit @@ -8798,7 +8815,7 @@ if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, Predicates); + return ExitLimit(BECount, MaxBECount, false, Predicates); } const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, @@ -9577,8 +9594,11 @@ L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; - if (!isa(SE->getMaxBackedgeTakenCount(L))) { + bool MaxOrZero = false; + if (!isa(SE->getMaxBackedgeTakenCount(L, &MaxOrZero))) { OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + if (MaxOrZero) + OS << ", actual taken count either this or zero."; } else { OS << "Unpredictable max backedge-taken count. "; } Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1000,14 +1000,21 @@ if (Convergent) UP.AllowRemainder = false; - // Try to find the trip count upper bound if it is allowed and we cannot find - // exact trip count. - if (UP.UpperBound) { - if (!TripCount) { - MaxTripCount = SE->getSmallConstantMaxTripCount(L); - // Only unroll with small upper bound. - if (MaxTripCount > UnrollMaxUpperBound) - MaxTripCount = 0; + // Try to find the trip count upper bound if we cannot find the exact trip + // count. + bool MaxOrZero = false; + if (!TripCount) { + MaxTripCount = SE->getSmallConstantMaxTripCount(L, &MaxOrZero); + // We can unroll by the upper bound amount if it's generally allowed or if + // we know that the loop is executed either the upper bound or zero times. + // (MaxOrZero unrolling keeps only the first loop test, so the number of + // loop tests remains the same compared to the non-unrolled version, whereas + // the generic upper bound unrolling keeps all but the last loop test so the + // number of loop tests goes up which may end up being worse on targets with + // constriained branch predictor resources so is controlled by an option.) + // In addition we only unroll small upper bounds. + if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) { + MaxTripCount = 0; } } @@ -1025,8 +1032,8 @@ // Unroll the loop. if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime, - UP.AllowExpensiveTripCount, UseUpperBound, TripMultiple, LI, - SE, &DT, &AC, &ORE, PreserveLCSSA)) + UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero, + TripMultiple, LI, SE, &DT, &AC, &ORE, PreserveLCSSA)) return false; // If loop has an unroll count pragma or unrolled by explicitly set count Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -189,7 +189,8 @@ /// /// PreserveCondBr indicates whether the conditional branch of the LatchBlock /// needs to be preserved. It is needed when we use trip count upper bound to -/// fully unroll the loop. +/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first +/// conditional branch needs to be preserved. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. @@ -207,10 +208,10 @@ /// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, - bool PreserveCondBr, unsigned TripMultiple, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, OptimizationRemarkEmitter *ORE, - bool PreserveLCSSA) { + bool PreserveCondBr, bool PreserveOnlyFirst, + unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, + OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -550,7 +551,7 @@ assert(NeedConditional && "NeedCondition cannot be modified by both complete " "unrolling and runtime unrolling"); - NeedConditional = (PreserveCondBr && j); + NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0)); } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. Index: test/Analysis/ScalarEvolution/trip-count13.ll =================================================================== --- test/Analysis/ScalarEvolution/trip-count13.ll +++ test/Analysis/ScalarEvolution/trip-count13.ll @@ -14,7 +14,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @u_0 ; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) umax %rhs)) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -100 +; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero. leave: ret void @@ -34,7 +34,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @u_1 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) umax %start)) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -100 +; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero. leave: ret void @@ -54,7 +54,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @s_0 ; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) smax %rhs)) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -100 +; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero. leave: ret void @@ -74,7 +74,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @s_1 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) smax %start)) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -100 +; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero. leave: ret void Index: test/Analysis/ScalarEvolution/trip-count14.ll =================================================================== --- test/Analysis/ScalarEvolution/trip-count14.ll +++ test/Analysis/ScalarEvolution/trip-count14.ll @@ -15,7 +15,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @s32_max1 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n)) -; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero. do.end: ret void @@ -36,7 +36,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @s32_max2 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n)) -; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero. do.end: ret void @@ -57,7 +57,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @s32_maxx ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n)) -; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}} do.end: ret void @@ -82,7 +82,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit ; CHECK-NEXT: Loop %do.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: ret void @@ -103,7 +103,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @u32_max1 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) umax %n)) -; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero. do.end: ret void @@ -124,7 +124,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @u32_max2 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) umax %n)) -; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero. do.end: ret void @@ -145,7 +145,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @u32_maxx ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) umax %n)) -; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}} do.end: ret void @@ -170,7 +170,7 @@ ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit ; CHECK-NEXT: Loop %do.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2 +; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} do.end: ret void Index: test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll @@ -0,0 +1,207 @@ +; RUN: opt -S -loop-unroll < %s | FileCheck %s + +; Unroll twice, with first loop exit kept +; CHECK-LABEL: @s32_max1 +; CHECK: do.body: +; CHECK: store +; CHECK: br i1 %cmp, label %do.body.1, label %do.end +; CHECK: do.end: +; CHECK: ret void +; CHECK: do.body.1: +; CHECK: store +; CHECK: br label %do.end +define void @s32_max1(i32 %n, i32* %p) { +entry: + %add = add i32 %n, 1 + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ] + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, %add + br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times + +do.end: + ret void +} + +; Unroll thrice, with first loop exit kept +; CHECK-LABEL: @s32_max2 +; CHECK: do.body: +; CHECK: store +; CHECK: br i1 %cmp, label %do.body.1, label %do.end +; CHECK: do.end: +; CHECK: ret void +; CHECK: do.body.1: +; CHECK: store +; CHECK: store +; CHECK: br label %do.end +define void @s32_max2(i32 %n, i32* %p) { +entry: + %add = add i32 %n, 2 + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ] + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, %add + br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times + +do.end: + ret void +} + +; Should not be unrolled +; CHECK-LABEL: @s32_maxx +; CHECK: do.body: +; CHECK: do.end: +; CHECK-NOT: do.body.1: +define void @s32_maxx(i32 %n, i32 %x, i32* %p) { +entry: + %add = add i32 %x, %n + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ] + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, %add + br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times + +do.end: + ret void +} + +; Should not be unrolled +; CHECK-LABEL: @s32_max2_unpredictable_exit +; CHECK: do.body: +; CHECK: do.end: +; CHECK-NOT: do.body.1: +define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) { +entry: + %add = add i32 %n, 2 + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ] + %cmp = icmp eq i32 %i.0, %x + br i1 %cmp, label %do.end, label %if.end ; unpredictable + +if.end: + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp1 = icmp slt i32 %i.0, %add + br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times + +do.end: + ret void +} + +; Unroll twice, with first loop exit kept +; CHECK-LABEL: @u32_max1 +; CHECK: do.body: +; CHECK: store +; CHECK: br i1 %cmp, label %do.body.1, label %do.end +; CHECK: do.end: +; CHECK: ret void +; CHECK: do.body.1: +; CHECK: store +; CHECK: br label %do.end +define void @u32_max1(i32 %n, i32* %p) { +entry: + %add = add i32 %n, 1 + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ] + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp = icmp ult i32 %i.0, %add + br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times + +do.end: + ret void +} + +; Unroll thrice, with first loop exit kept +; CHECK-LABEL: @u32_max2 +; CHECK: do.body: +; CHECK: store +; CHECK: br i1 %cmp, label %do.body.1, label %do.end +; CHECK: do.end: +; CHECK: ret void +; CHECK: do.body.1: +; CHECK: store +; CHECK: store +; CHECK: br label %do.end +define void @u32_max2(i32 %n, i32* %p) { +entry: + %add = add i32 %n, 2 + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ] + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp = icmp ult i32 %i.0, %add + br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times + +do.end: + ret void +} + +; Should not be unrolled +; CHECK-LABEL: @u32_maxx +; CHECK: do.body: +; CHECK: do.end: +; CHECK-NOT: do.body.1: +define void @u32_maxx(i32 %n, i32 %x, i32* %p) { +entry: + %add = add i32 %x, %n + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ] + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp = icmp ult i32 %i.0, %add + br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times + +do.end: + ret void +} + +; Should not be unrolled +; CHECK-LABEL: @u32_max2_unpredictable_exit +; CHECK: do.body: +; CHECK: do.end: +; CHECK-NOT: do.body.1: +define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) { +entry: + %add = add i32 %n, 2 + br label %do.body + +do.body: + %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ] + %cmp = icmp eq i32 %i.0, %x + br i1 %cmp, label %do.end, label %if.end ; unpredictable + +if.end: + %arrayidx = getelementptr i32, i32* %p, i32 %i.0 + store i32 %i.0, i32* %arrayidx, align 4 + %inc = add i32 %i.0, 1 + %cmp1 = icmp ult i32 %i.0, %add + br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times + +do.end: + ret void +}