Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -917,6 +917,11 @@ bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test if the given expression is known to satisfy the condition described + /// by Pred, LHS, and RHS in the basic block BB. + bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const BasicBlock *BB); + /// Test if the condition described by Pred, LHS, RHS is known to be true on /// every iteration of the loop of the recurrency LHS. bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -9100,6 +9100,13 @@ return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); } +bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const BasicBlock *BB) { + return isKnownPredicate(Pred, LHS, RHS) || + isBasicBlockEntryGuardedByCond(BB, Pred, LHS, RHS); +} + bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS) { Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2330,9 +2330,9 @@ } // Returns true if the condition of \p BI being checked is invariant and can be -// proved to be trivially true. +// proved to be trivially true during at least first \p MaxIter iterations. static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE, - bool ProvingLoopExit) { + bool ProvingLoopExit, const SCEV *MaxIter) { ICmpInst::Predicate Pred; Value *LHS, *RHS; using namespace PatternMatch; @@ -2355,10 +2355,74 @@ const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); // Can we prove it to be trivially true? - if (SE->isKnownPredicate(Pred, LHSS, RHSS)) + BasicBlock *BB = BI->getParent(); + if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BB)) return true; - return false; + if (ProvingLoopExit) + return false; + // If we are proving that we stay in loop, try to prove the following set of + // facts: + // - The predicate is true on the 1st iteration; + // - The predicate is still true on suggested last iteration; + // - No overflow happens in between. + + auto *AR = dyn_cast(LHSS); + // TODO: Lift affinity limitation in the future. + if (!AR || !AR->isAffine()) + return false; + + // The predicate must be monotonic. + switch (Pred) { + default: + return false; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGE: + break; + } + + // First, check the predicate on the 1st iteration. + const SCEV *Start = AR->getStart(); + if (!SE->isKnownPredicateAt(Pred, Start, RHSS, BB)) + return false; + + // TODO: Support steps other than +/- 1. + const SCEV *Step = AR->getOperand(1); + auto *One = SE->getOne(Step->getType()); + auto *MinusOne = SE->getNegativeSCEV(One); + if (Step != One && Step != MinusOne) + return false; + + // Type mismatch here means that MaxIter is potentially larger than max + // unsigned value in start type, which mean we cannot prove no wrap for the + // indvar. + if (Start->getType() != MaxIter->getType()) + return false; + // Value of IV on suggested last iteration. + const SCEV *Last = AR->evaluateAtIteration(MaxIter, *SE); + // Does it still meet the requirement? + if (!SE->isKnownPredicateAt(Pred, Last, RHSS, BB)) + return false; + // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does + // not exceed max unsigned value of this type), this effectively proves + // that there is no wrap during the iteration. To prove that there is no + // signed/unsigned wrap, we need to check that + // Start <= Last for step = 1 or Start >= Last for step = -1. + ICmpInst::Predicate NoOverflowPred = + CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + if (Step == MinusOne) + NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); + if (!SE->isKnownPredicateAt(NoOverflowPred, Start, Last, BB)) + return false; + + // Everything is fine. + return true; } bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { @@ -2433,18 +2497,20 @@ for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) { + auto *BI = cast(ExitingBB->getTerminator()); + auto OptimizeCond = [&](bool Inverted, const SCEV *MaxIter) { + if (isTrivialCond(L, BI, SE, Inverted, MaxIter)) { + FoldExit(ExitingBB, Inverted); + return true; + } + return false; + }; // Okay, we do not know the exit count here. Can we at least prove that it // will remain the same within iteration space? - if (isTrivialCond(L, cast(ExitingBB->getTerminator()), SE, - false)) { - FoldExit(ExitingBB, false); + if (OptimizeCond(false, MaxExitCount)) Changed = true; - } - if (isTrivialCond(L, cast(ExitingBB->getTerminator()), SE, - true)) { - FoldExit(ExitingBB, true); + else if (OptimizeCond(true, MaxExitCount)) Changed = true; - } continue; } Index: llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -12,8 +12,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -93,8 +92,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[RC:%.*]] = icmp sgt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -172,8 +170,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[RC:%.*]] = icmp ugt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -211,8 +208,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] Index: llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -71,8 +71,7 @@ ; CHECK-NEXT: br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]] ; CHECK: range_check_block: ; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], 1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]] ; CHECK-NEXT: [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4