Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1102,6 +1102,9 @@ /// Mark SCEVUnknown Phis currently being processed by getRangeRef. SmallPtrSet PendingPhiRanges; + // Mark SCEVUnknown Phis currently being processed by isImpliedViaMerge. + SmallPtrSet PendingMerges; + /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of /// conditions dominating the backedge of a loop. bool WalkingBEDominatingConds = false; @@ -1628,6 +1631,18 @@ const SCEV *FoundLHS, const SCEV *FoundRHS); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to figure out predicate for Phis which are SCEVUnknown + /// if it is true for every possible incoming value from their respective + /// basic blocks. + bool isImpliedViaMerge(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, + unsigned Depth); + /// If we know that the specified Phi is in the header of its containing /// loop, we know the loop executes a constant number of times, and the PHI /// node is just a recurrence involving constants, fold it. Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -9535,6 +9535,109 @@ getConstant(FoundRHSLimit)); } +bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, + unsigned Depth) { + const PHINode *LPhi = nullptr, *RPhi = nullptr; + + auto ClearOnExit = make_scope_exit([&]() { + if (LPhi) { + bool Erased = PendingMerges.erase(LPhi); + assert(Erased && "Failed to erase LPhi!"); + (void) Erased; + } + if (RPhi) { + bool Erased = PendingMerges.erase(RPhi); + assert(Erased && "Failed to erase RPhi!"); + (void) Erased; + } + }); + + // Find respective Phis and check that they are not being pending. + if (const SCEVUnknown *LU = dyn_cast(LHS)) + if (auto *Phi = dyn_cast(LU->getValue())) { + if (!PendingMerges.insert(Phi).second) + return false; + LPhi = Phi; + } + if (const SCEVUnknown *RU = dyn_cast(RHS)) + if (auto *Phi = dyn_cast(RU->getValue())) { + if (!PendingMerges.insert(Phi).second) + return false; + RPhi = Phi; + } + + // If none of LHS, RHS is a Phi, nothing to do here. + if (!LPhi && !RPhi) + return false; + + auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) { + return isKnownViaNonRecursiveReasoning(Pred, S1, S2) || + isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) || + isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth); + }; + + // For Phi nodes, we should prove the predicate for every incoming value that + // it can possibly take. + if (LPhi && RPhi) { + // If both are Phis, then they should be from one basic block. + if (LPhi->getParent() != RPhi->getParent()) + return false; + // If we compare two Phis from the same block, and for each entry block + // the predicate is true for icoming values from this block, then the + // predicate is also true for the Phis. + for (const BasicBlock *IncBB : predecessors(LPhi->getParent())) { + // Ignore unreachable predecessors. + if (!DT.isReachableFromEntry(IncBB)) + continue; + const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); + assert(dominates(L, IncBB) && "Dominance broken?"); + assert(dominates(R, IncBB) && "Dominance broken?"); + if (!ProvedEasily(L, R)) + return false; + } + return true; + } else if (LPhi) { + if (!properlyDominates(RHS, LPhi->getParent())) + return false; + for (const BasicBlock *IncBB : predecessors(LPhi->getParent())) { + // Ignore unreachable predecessors. + if (!DT.isReachableFromEntry(IncBB)) + continue; + // TODO: It is AddRec Phi being compared against SCEVUnknown Phi; we can + // handle this case later, so far we bail. + if (!dominates(RHS, IncBB)) + return false; + const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + assert(dominates(L, IncBB) && "Dominance broken?"); + if (!ProvedEasily(L, RHS)) + return false; + } + return true; + } else { + assert(RPhi && "Checked above!"); + if (!properlyDominates(LHS, RPhi->getParent())) + return false; + for (const BasicBlock *IncBB : predecessors(RPhi->getParent())) { + // Ignore unreachable predecessors. + if (!DT.isReachableFromEntry(IncBB)) + continue; + // TODO: It is AddRec Phi being compared against SCEVUnknown Phi; we can + // handle this case later, so far we bail. + if (!dominates(LHS, IncBB)) + return false; + const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); + assert(dominates(R, IncBB) && "Dominance broken?"); + if (!ProvedEasily(LHS, R)) + return false; + } + return true; + } +} + bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, @@ -9688,6 +9791,7 @@ }; // Acquire values from extensions. + auto *OrigLHS = LHS; auto *OrigFoundLHS = FoundLHS; LHS = GetOpFromSExt(LHS); FoundLHS = GetOpFromSExt(FoundLHS); @@ -9795,6 +9899,12 @@ } } + // If our expression contained SCEVUnknown Phis, and we split it down and now + // need to prove something for them, try to prove the predicate for every + // possible incoming values of those Phis. + if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1)) + return true; + return false; } @@ -10880,6 +10990,7 @@ ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), PendingPhiRanges(std::move(Arg.PendingPhiRanges)), + PendingMerges(std::move(Arg.PendingMerges)), MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( @@ -10924,6 +11035,7 @@ assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(PendingPhiRanges.empty() && "getRangeRef garbage"); + assert(PendingMerges.empty() && "isImpliedViaMerge garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -118,5 +118,52 @@ ret void } +; Check that we can figure out that IV is non-negative via implication through +; Phi node. +define void @test_03(i32* %a, i32* %a_len_ptr, i1 %cond) { + +; CHECK-LABEL: test_03 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.minus.one = sub nsw i32 %len.a, 1 + %len.minus.two = sub nsw i32 %len.a, 2 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + br label %merge + +merge: + %starting.value = phi i32 [ %len.minus.two, %if.true ], [ %len.minus.one, %if.false ] + %first.itr.check = icmp sgt i32 %len.a, 3 + br i1 %first.itr.check, label %loop, label %exit + +loop: + %idx = phi i32 [ %starting.value, %merge ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rc = icmp ult i32 %idx.next, %len.a + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %v = load i32, i32* %el.a + %loop.cond = icmp slt i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + !0 = !{i32 0, i32 2147483647} !1 = !{!"branch_weights", i32 64, i32 4} Index: test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll =================================================================== --- test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -131,4 +131,43 @@ br i1 %loopcond, label %loopexit, label %loop } +define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a, i1 %cond) { + +; CHECK-LABEL: @promote_latch_condition_decrementing_loop_02( +; CHECK-NOT: trunc + +entry: + %len = load i32, i32* %p, align 4, !range !0 + %len.minus.1 = add nsw i32 %len, -1 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + br label %merge + +merge: + %iv_start = phi i32 [ %len, %if.true ], [%len.minus.1, %if.false ] + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %iv_start, %preheader ] + ; CHECK: %indvars.iv = phi i64 + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1 + %loopcond = icmp slt i32 %iv, 1 + br i1 %loopcond, label %loopexit, label %loop +} + !0 = !{i32 0, i32 2147483647} Index: test/Transforms/LoopPredication/prove-via-merge.ll =================================================================== --- /dev/null +++ test/Transforms/LoopPredication/prove-via-merge.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +; Just demonstrate that this test does not crash with assertion on ProveViaMerge +; while trying to predicate a loop. +define void @test(i32* %p, i32* %len.p, i1 %some.cond) #0 { + +; CHECK-LABEL: @test + +entry: + %loaded = load i32, i32* %p, align 4, !range !0 + %length.i = load i32, i32* %len.p, align 8, !range !0 + br i1 %some.cond, label %exit, label %loop + +loop: ; preds = %latch.outer, %entry + %iv.outer = phi i32 [ %iv.outer.next, %latch.outer ], [ %loaded, %entry ] + %scev.unknown = phi i32 [ 1, %latch.outer ], [ %loaded, %entry ] + %tmp3 = icmp slt i32 %loaded, %length.i + %tmp4 = add i32 %scev.unknown, 1 + %tmp5 = icmp ult i32 %tmp4, %length.i + %cond = and i1 %tmp5, %tmp3 + br i1 %cond, label %loop_inner, label %bail + +loop_inner: ; preds = %loop_inner, %loop + %iv.inner = phi i32 [ %iv.inner.next, %loop_inner ], [ %scev.unknown, %loop ] + %iv.inner.next = add nsw i32 %iv.inner, 1 + %len.ne.zero = icmp ne i32 %length.i, 0 + call void (i1, ...) @llvm.experimental.guard(i1 %len.ne.zero) [ "deopt"() ] + %inner.cond = icmp slt i32 %iv.inner.next, %iv.outer + br i1 %inner.cond, label %loop_inner, label %latch.outer + +latch.outer: ; preds = %loop_inner + %iv.outer.next = add i32 %iv.outer, 1 + %outer.cond = icmp sgt i32 %iv.outer.next, 85 + br i1 %outer.cond, label %exit, label %loop + +exit: ; preds = %latch.outer, %entry + ret void + +bail: ; preds = %loop + unreachable +} + +!0 = !{i32 0, i32 2147483647}