Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -1142,6 +1142,9 @@ /// Mark SCEVUnknown Phis currently being processed by getRangeRef. SmallPtrSet PendingPhiRanges; + // Mark SCEVUnknown Phis currently being processed by isImpliedViaMerge. + SmallPtrSet PendingMerges; + /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of /// conditions dominating the backedge of a loop. bool WalkingBEDominatingConds = false; @@ -1667,6 +1670,18 @@ const SCEV *FoundLHS, const SCEV *FoundRHS); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to figure out predicate for Phis which are SCEVUnknown + /// if it is true for every possible incoming value from their respective + /// basic blocks. + bool isImpliedViaMerge(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, + unsigned Depth); + /// If we know that the specified Phi is in the header of its containing /// loop, we know the loop executes a constant number of times, and the PHI /// node is just a recurrence involving constants, fold it. Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -9518,6 +9518,115 @@ getConstant(FoundRHSLimit)); } +bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, unsigned Depth) { + const PHINode *LPhi = nullptr, *RPhi = nullptr; + + auto ClearOnExit = make_scope_exit([&]() { + if (LPhi) { + bool Erased = PendingMerges.erase(LPhi); + assert(Erased && "Failed to erase LPhi!"); + (void)Erased; + } + if (RPhi) { + bool Erased = PendingMerges.erase(RPhi); + assert(Erased && "Failed to erase RPhi!"); + (void)Erased; + } + }); + + // Find respective Phis and check that they are not being pending. + if (const SCEVUnknown *LU = dyn_cast(LHS)) + if (auto *Phi = dyn_cast(LU->getValue())) { + if (!PendingMerges.insert(Phi).second) + return false; + LPhi = Phi; + } + if (const SCEVUnknown *RU = dyn_cast(RHS)) + if (auto *Phi = dyn_cast(RU->getValue())) { + // If we detect a loop of Phi nodes being processed by this method, for + // example: + // + // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ] + // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ] + // + // we don't want to deal with a case that complex, so return conservative + // answer false. + if (!PendingMerges.insert(Phi).second) + return false; + RPhi = Phi; + } + + // If none of LHS, RHS is a Phi, nothing to do here. + if (!LPhi && !RPhi) + return false; + + // If there is a SCEVUnknown Phi we are interested in, make it left. + if (!LPhi) { + std::swap(LHS, RHS); + std::swap(FoundLHS, FoundRHS); + std::swap(LPhi, RPhi); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!"); + const BasicBlock *LBB = LPhi->getParent(); + const SCEVAddRecExpr *RAR = dyn_cast(RHS); + + auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) { + return isKnownViaNonRecursiveReasoning(Pred, S1, S2) || + isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) || + isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth); + }; + + if (RPhi && RPhi->getParent() == LBB) { + // Case one: RHS is also a SCEVUnknown Phi from the same basic block. + // If we compare two Phis from the same block, and for each entry block + // the predicate is true for incoming values from this block, then the + // predicate is also true for the Phis. + for (const BasicBlock *IncBB : predecessors(LBB)) { + const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); + if (!ProvedEasily(L, R)) + return false; + } + } else if (RAR && RAR->getLoop()->getHeader() == LBB) { + // Case two: RHS is also a Phi from the same basic block, and it is an + // AddRec. It means that there is a loop which has both AddRec and Unknown + // PHIs, for it we can compare incoming values of AddRec from preheader and + // latch with their respective incoming values of LPhi. + assert(LPhi->getNumIncomingValues() == 2 && + "Phi node standing in loop header does not have exactly 2 inputs?"); + auto *RLoop = RAR->getLoop(); + auto *Preheader = RLoop->getLoopPreheader(); + assert(Preheader && "Loop with AddRec with no preheader?"); + const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Preheader)); + if (!ProvedEasily(L1, RAR->getStart())) + return false; + auto *Latch = RLoop->getLoopLatch(); + assert(Latch && "Loop with AddRec with no latch?"); + const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch)); + if (!ProvedEasily(L2, RAR->getPostIncExpr(*this))) + return false; + } else { + // In all other cases go over inputs of LHS and compare each of them to RHS, + // the predicate is true for (LHS, RHS) if it is true for all such pairs. + // At this point RHS is either a non-Phi, or it is a Phi from some block + // different from LBB. + for (const BasicBlock *IncBB : predecessors(LBB)) { + // Check that RHS is available in this block. + if (!dominates(RHS, IncBB)) + return false; + const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + if (!ProvedEasily(L, RHS)) + return false; + } + } + return true; +} + bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, @@ -9671,6 +9780,7 @@ }; // Acquire values from extensions. + auto *OrigLHS = LHS; auto *OrigFoundLHS = FoundLHS; LHS = GetOpFromSExt(LHS); FoundLHS = GetOpFromSExt(FoundLHS); @@ -9778,6 +9888,12 @@ } } + // If our expression contained SCEVUnknown Phis, and we split it down and now + // need to prove something for them, try to prove the predicate for every + // possible incoming values of those Phis. + if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1)) + return true; + return false; } @@ -10863,6 +10979,7 @@ ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), PendingPhiRanges(std::move(Arg.PendingPhiRanges)), + PendingMerges(std::move(Arg.PendingMerges)), MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( @@ -10907,6 +11024,7 @@ assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(PendingPhiRanges.empty() && "getRangeRef garbage"); + assert(PendingMerges.empty() && "isImpliedViaMerge garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } Index: llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll +++ llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll @@ -119,5 +119,148 @@ ret void } +; Check that we can figure out that IV is non-negative via implication through +; Phi node. +define void @test_03(i32* %a, i32* %a_len_ptr, i1 %cond) { + +; CHECK-LABEL: test_03 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.minus.one = sub nsw i32 %len.a, 1 + %len.minus.two = sub nsw i32 %len.a, 2 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + br label %merge + +merge: + %starting.value = phi i32 [ %len.minus.two, %if.true ], [ %len.minus.one, %if.false ] + %first.itr.check = icmp sgt i32 %len.a, 3 + br i1 %first.itr.check, label %loop, label %exit + +loop: + %idx = phi i32 [ %starting.value, %merge ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rc = icmp ult i32 %idx.next, %len.a + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %v = load i32, i32* %el.a + %loop.cond = icmp slt i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check that we can figure out that IV is non-negative via implication through +; two Phi nodes. +define void @test_04(i32* %a, i32* %a_len_ptr, i1 %cond) { + +; CHECK-LABEL: test_04 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.minus.one = sub nsw i32 %len.a, 1 + %len.plus.one = add nsw i32 %len.a, 1 + %len.minus.two = sub nsw i32 %len.a, 2 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + br label %merge + +merge: + %starting.value = phi i32 [ %len.minus.two, %if.true ], [ %len.minus.one, %if.false ] + %len.phi = phi i32 [ %len.a, %if.true ], [ %len.plus.one, %if.false ] + %first.itr.check = icmp sgt i32 %len.a, 3 + br i1 %first.itr.check, label %loop, label %exit + +loop: + %idx = phi i32 [ %starting.value, %merge ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rc = icmp ult i32 %idx.next, %len.phi + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %v = load i32, i32* %el.a + %loop.cond = icmp slt i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check that we can figure out that IV is non-negative via implication through +; two Phi nodes, one being AddRec. +define void @test_05(i32* %a, i32* %a_len_ptr, i1 %cond) { + +; CHECK-LABEL: test_05 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.minus.one = sub nsw i32 %len.a, 1 + %len.plus.one = add nsw i32 %len.a, 1 + %len.minus.two = sub nsw i32 %len.a, 2 + br label %merge + +merge: + %starting.value = phi i32 [ %len.minus.two, %entry ], [ %len.minus.one, %merge ] + %len.phi = phi i32 [ %len.a, %entry ], [ %len.phi.next, %merge ] + %len.phi.next = add nsw i32 %len.phi, 1 + br i1 true, label %first.iter.check, label %merge + +first.iter.check: + %first.itr.check = icmp sgt i32 %len.a, 3 + br i1 %first.itr.check, label %loop, label %exit + +loop: + %idx = phi i32 [ %starting.value, %first.iter.check ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rc = icmp ult i32 %idx.next, %len.phi + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %v = load i32, i32* %el.a + %loop.cond = icmp slt i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + !0 = !{i32 0, i32 2147483647} !1 = !{!"branch_weights", i32 64, i32 4} Index: llvm/trunk/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -188,5 +188,43 @@ br i1 %loopcond, label %loopexit, label %loop } +define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 %cond) { + +; CHECK-LABEL: @promote_latch_condition_decrementing_loop_04( +; CHECK-NOT: trunc + +entry: + %len = load i32, i32* %p, align 4, !range !0 + %len.minus.1 = add nsw i32 %len, -1 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + br label %merge + +merge: + %iv_start = phi i32 [ %len, %if.true ], [%len.minus.1, %if.false ] + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %iv_start, %preheader ] + ; CHECK: %indvars.iv = phi i64 + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1 + %loopcond = icmp slt i32 %iv, 1 + br i1 %loopcond, label %loopexit, label %loop +} !0 = !{i32 0, i32 2147483647}