Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -8754,8 +8754,33 @@ const SCEVAddRecExpr *LHS, const SCEV *RHS) { const Loop *L = LHS->getLoop(); - return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) && - isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS); + // Check if the condition is met on the first iteration. + if (!isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS)) + return false; + + // If the backedge is guarded by condition for post-inc LHS, then we've + // proved that our predicate will be true on every iteration. + if (isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS)) + return true; + + // If the predicate is monotonic, and we've already proved it on the first + // iteration, it is enough to prove it on the last iteration. The tricky part + // is that the monotonicity could be cased on no-wrap flag which was derived + // from a taken side-exit (for example, a guard), and if we actually did the + // number of iterations returned by getBackedgeTakenCount(), we would have + // overflown. That is why we make an extra check that we don't overflow even + // if we make this many iterations. + bool Increasing = false; + if (isMonotonicPredicate(LHS, Pred, Increasing) && + isProvedNoOverflowOnNormalExit(LHS, ICmpInst::isSigned(Pred))) { + const SCEV *End = LHS->evaluateAtIteration(getBackedgeTakenCount(L), *this); + if (isAvailableAtLoopEntry(End, L) && + isLoopEntryGuardedByCond(L, Pred, End, RHS)) + return true; + } + + // Failed to prove anything, conservatively return false. + return false; } bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, Index: test/Transforms/IndVarSimplify/prove_via_monotonicity.ll =================================================================== --- /dev/null +++ test/Transforms/IndVarSimplify/prove_via_monotonicity.ll @@ -0,0 +1,102 @@ +; RUN: opt -indvars -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.experimental.guard(i1, ...) + +; Check that we can widen the indvar for the decrementing loop. Phi's range +; doesn't give us enough information, so we have to prove it using monotonicity. + +define void @test_01(i32* %p, i32* %a) { + +; CHECK-LABEL: @test_01( +; CHECK-NOT: trunc + +entry: + %len = load i32, i32* %p, align 4, !range !0 + %len.minus.1 = add nsw i32 %len, -1 + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %len.minus.1, %preheader ] + ; CHECK: %indvars.iv = phi i64 + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1 + %loopcond = icmp slt i32 %iv, 1 + br i1 %loopcond, label %loopexit, label %loop +} + +define void @test_02(i32* %p, i32* %a) { + +; CHECK-LABEL: @test_02( +; CHECK-NOT: trunc + +entry: + %len = load i32, i32* %p, align 4, !range !0 + %len.minus.1 = add nsw i32 %len, -1 + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %len.minus.1, %preheader ] + ; CHECK: %indvars.iv = phi i64 + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + ; CHECK: %loopcond = icmp ult i64 %indvars.iv, 1 + %loopcond = icmp ult i32 %iv, 1 + br i1 %loopcond, label %loopexit, label %loop +} + +; In this situation the indvar has nsw, but it is derived from the guard. If the +; side exit wasn't present, we would be unable to prove it. Make sure that we do +; not optimize away the guard basing on the fact which was derived from this +; very guard. +define void @test_03(i32* %p, i32* %a) { + +; CHECK-LABEL: @test_03 + +entry: + %len = load i32, i32* %p, align 4, !range !0 + %len.minus.1 = add nsw i32 %len, -1 + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ -2000000000, %preheader ] + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + %fishy = icmp sgt i32 %iv, -2000000009 + ; CHECK-NOT: call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] + call void(i1, ...) @llvm.experimental.guard(i1 %fishy) [ "deopt"() ] + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + %loopcond = icmp eq i32 %iv, 2000000000 + br i1 %loopcond, label %loopexit, label %loop +} + +!0 = !{i32 0, i32 2147483647}