Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -803,6 +803,13 @@ /// Cache for \c loopHasNoAbnormalExits. DenseMap<const Loop *, bool> LoopHasNoAbnormalExits; + /// Cache for \c loopHasNoSideEffects. + DenseMap<const Loop *, bool> LoopHasNoSideEffects; + + /// Returns true if \p L contains no instruction that can have side effects + /// (i.e. via throwing an exception, volatile or atomic access). + bool loopHasNoSideEffects(const Loop *L); + /// Returns true if \p L contains no instruction that can abnormally exit /// the loop (i.e. via throwing an exception, by terminating the thread /// cleanly or by infinite looping in a called function). Strictly Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -4947,6 +4947,28 @@ return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); } +bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) { + auto Itr = LoopHasNoSideEffects.find(L); + if (Itr == LoopHasNoSideEffects.end()) { + auto NoSideEffectsInBB = [&](BasicBlock *BB) { + return all_of(*BB, [](Instruction &I) { + // Non-atomic, non-volatile stores are ok. + if (auto *SI = dyn_cast<StoreInst>(&I)) + return SI->isSimple(); + + return !I.mayHaveSideEffects(); + }); + }; + + auto InsertPair = LoopHasNoSideEffects.insert( + {L, all_of(L->getBlocks(), NoSideEffectsInBB)}); + assert(InsertPair.second && "We just checked!"); + Itr = InsertPair.first; + } + + return Itr->second; +} + bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) { auto Itr = LoopHasNoAbnormalExits.find(L); if (Itr == LoopHasNoAbnormalExits.end()) { @@ -5534,6 +5556,7 @@ forgetLoop(I); LoopHasNoAbnormalExits.erase(L); + LoopHasNoSideEffects.erase(L); } void ScalarEvolution::forgetValue(Value *V) { @@ -8701,9 +8724,29 @@ const SCEV *Stride = IV->getStepRecurrence(*this); - // Avoid negative or zero stride values - if (!isKnownPositive(Stride)) - return getCouldNotCompute(); + bool PositiveStride = isKnownPositive(Stride); + + // Avoid negative or zero stride values. + if (!PositiveStride) + // We can compute the correct backedge taken count for loops with unknown + // strides if we can prove that the loop is not an infinite loop with side + // effects. The following checks are used to check the same. Here's an + // example of the loop we are trying to handle here- + // + // for(int i=0; i<n; i+=s) + // A[i] = i; + // + // We also want to make sure that the stride is truly unknown as there are + // cases where the stride is known to be negative and ScalarEvolution + // propagates no wrap flags to the post-increment/decrement IV. The computed + // backedge taken count may be wrong in such cases. For example- + // + // unsigned char i; + // for(i=127; i<128; i+=129) + // A[i] = i; + // + if (!NoWrap || isKnownNonPositive(Stride) || !loopHasNoSideEffects(L)) + return getCouldNotCompute(); // Avoid proven overflow cases: this will ensure that the backedge taken count // will not generate any unsigned overflow. Relaxed no-overflow conditions Index: llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll +++ llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +; ScalarEvolution should be able to compute trip count of the loop by proving +; that this is not an infinite loop with side effects. + +; CHECK: Determining loop execution counts for: @foo1 +; CHECK: backedge-taken count is ((-1 + %n) /u %s) + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +; Function Attrs: norecurse nounwind +define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05 + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %add = add nsw i32 %i.05, %s + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +; Check that we are able to compute trip count of a loop without an entry guard. +; CHECK: Determining loop execution counts for: @foo2 +; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s) + +; Function Attrs: norecurse nounwind +define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05 + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %add = add nsw i32 %i.05, %s + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} +