Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -803,6 +803,13 @@ /// Cache for \c loopHasNoAbnormalExits. DenseMap LoopHasNoAbnormalExits; + /// Cache for \c loopHasNoSideEffects. + DenseMap LoopHasNoSideEffects; + + /// Returns true if \p L contains no instruction that can have side effects + /// (i.e. via throwing an exception, volatile or atomic access). + bool loopHasNoSideEffects(const Loop *L); + /// Returns true if \p L contains no instruction that can abnormally exit /// the loop (i.e. via throwing an exception, by terminating the thread /// cleanly or by infinite looping in a called function). Strictly Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -4949,6 +4949,28 @@ return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); } +bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) { + auto Itr = LoopHasNoSideEffects.find(L); + if (Itr == LoopHasNoSideEffects.end()) { + auto NoSideEffectsInBB = [&](BasicBlock *BB) { + return all_of(*BB, [](Instruction &I) { + // Non-atomic, non-volatile stores are ok. + if (auto *SI = dyn_cast(&I)) + return SI->isSimple(); + + return !I.mayHaveSideEffects(); + }); + }; + + auto InsertPair = LoopHasNoSideEffects.insert( + {L, all_of(L->getBlocks(), NoSideEffectsInBB)}); + assert(InsertPair.second && "We just checked!"); + Itr = InsertPair.first; + } + + return Itr->second; +} + bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) { auto Itr = LoopHasNoAbnormalExits.find(L); if (Itr == LoopHasNoAbnormalExits.end()) { @@ -5536,6 +5558,7 @@ forgetLoop(I); LoopHasNoAbnormalExits.erase(L); + LoopHasNoSideEffects.erase(L); } void ScalarEvolution::forgetValue(Value *V) { @@ -8678,11 +8701,15 @@ return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); - if (!IV && AllowPredicates) + bool PredicatedIV = false; + + if (!IV && AllowPredicates) { // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the // algorithm below. IV = convertSCEVToAddRecWithPredicates(LHS, L, P); + PredicatedIV = true; + } // Avoid weird loops if (!IV || IV->getLoop() != L || !IV->isAffine()) @@ -8693,9 +8720,55 @@ const SCEV *Stride = IV->getStepRecurrence(*this); - // Avoid negative or zero stride values - if (!isKnownPositive(Stride)) - return getCouldNotCompute(); + bool PositiveStride = isKnownPositive(Stride); + + // Avoid negative or zero stride values. + if (!PositiveStride) + // We can compute the correct backedge taken count for loops with unknown + // strides if we can prove that the loop is not an infinite loop with side + // effects. Here's the loop structure we are trying to handle - + // + // i = start + // do { + // A[i] = i; + // i += s; + // } while (i < end); + // + // The backedge taken count for such loops is evaluated as - + // (max(end, start + stride) - start - 1) /u stride + // + // The additional preconditions that we need to check to prove correctness + // of the above formula is as follows - + // + // a) IV is either nuw or nsw depending upon signedness (indicated by the + // NoWrap flag). + // b) loop is single exit with no side effects. + // + // + // Precondition a) implies that if the stride is negative, this is a single + // trip loop. The backedge taken count formula reduces to zero in this case. + // + // Precondition b) implies that the unknown stride cannot be zero otherwise + // we have UB. + // + // The positive stride case is the same as isKnownPositive(Stride) returning + // true (original behavior of the function). + // + // We want to make sure that the stride is truly unknown as there are edge + // cases where ScalarEvolution propagates no wrap flags to the + // post-increment/decrement IV even though the increment/decrement operation + // itself is wrapping. The computed backedge taken count may be wrong in + // such cases. This is prevented by checking that the stride is not known to + // be either positive or non-positive. For example, no wrap flags are + // propagated to the post-increment IV of this loop with a trip count of 2 - + // + // unsigned char i; + // for(i=127; i<128; i+=129) + // A[i] = i; + // + if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || + !loopHasNoSideEffects(L)) + return getCouldNotCompute(); // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions Index: llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +; ScalarEvolution should be able to compute trip count of the loop by proving +; that this is not an infinite loop with side effects. + +; CHECK: Determining loop execution counts for: @foo1 +; CHECK: backedge-taken count is ((-1 + %n) /u %s) + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +; Function Attrs: norecurse nounwind +define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05 + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %add = add nsw i32 %i.05, %s + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +; Check that we are able to compute trip count of a loop without an entry guard. +; CHECK: Determining loop execution counts for: @foo2 +; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s) + +; Function Attrs: norecurse nounwind +define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05 + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %add = add nsw i32 %i.05, %s + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} +