Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1162,6 +1162,18 @@ /// Try to apply information from loop guards for \p L to \p Expr. const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Return true if the loop has no abnormal exits. That is, if the loop + /// is not infinite, it must exit through an explicit edge in the CFG. + /// (As opposed to either a) throwing out of the function or b) entering a + /// well defined infinite loop in some callee.) + bool loopHasNoAbnormalExits(const Loop *L) { + return getLoopProperties(L).HasNoAbnormalExits; + } + + /// Return true if this loop is finite by assumption. That is, + /// to be infinite, it must also be undefined. + bool loopIsFiniteByAssumption(const Loop *L); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1482,14 +1494,6 @@ return getLoopProperties(L).HasNoSideEffects; } - bool loopHasNoAbnormalExits(const Loop *L) { - return getLoopProperties(L).HasNoAbnormalExits; - } - - /// Return true if this loop is finite by assumption. That is, - /// to be infinite, it must also be undefined. - bool loopIsFiniteByAssumption(const Loop *L); - /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1435,8 +1435,9 @@ auto *LHS = ICmp->getOperand(0); auto *RHS = ICmp->getOperand(1); - // Avoid computing SCEVs in the loop to avoid poisoning cache with - // sub-optimal results. + // For the range reasoning, avoid computing SCEVs in the loop to avoid + // poisoning cache with sub-optimal results. For the must-execute case, + // this is a neccessary precondition for correctness. if (!L->isLoopInvariant(RHS)) continue; @@ -1450,13 +1451,29 @@ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); auto FullCR = ConstantRange::getFull(InnerBitWidth); FullCR = FullCR.zeroExtend(OuterBitWidth); - if (!FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) + if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) { + // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus + // replace the signed condition with the unsigned version. + ICmp->setPredicate(ICmp->getUnsignedPredicate()); + Changed = true; continue; + } - // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus - // replace the signed condition with the unsigned version. - ICmp->setPredicate(ICmp->getUnsignedPredicate()); - Changed = true; + // If we have a loop which would be undefined if infinite, and it has at + // most one possible dynamic exit, then we can conclude that exit must + // be taken. If that exit must be taken, and we know the LHS can only + // take values in the positive domain, then we can conclude RHS must + // also be in that same range, and replace a signed compare with an + // unsigned one. + // If the exit might not be taken in a well defined program. + if (ExitingBlocks.size() == 1 && SE->loopHasNoAbnormalExits(L) && + SE->loopIsFiniteByAssumption(L)) { + // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus + // replace the signed condition with the unsigned version. + ICmp->setPredicate(ICmp->getUnsignedPredicate()); + Changed = true; + continue; + } } return Changed; } @@ -1842,10 +1859,13 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - if (canonicalizeExitCondition(L)) - // We've changed the predicate, but have not changed exit counts, or the - // values which can flow through any SCEV. i.e, no invalidation needed. + if (canonicalizeExitCondition(L)) { Changed = true; + // Given we've changed exit counts, notify SCEV + // Some nested loops may share same folded exit basic block, + // thus we need to notify top most loop. + SE->forgetTopmostLoop(L); + } // Try to eliminate loop exits based on analyzeable exit counts if (optimizeLoopExits(L, Rewriter)) { Index: llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll +++ llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll @@ -105,7 +105,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -391,7 +391,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -445,7 +445,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -499,7 +499,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void