Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1162,6 +1162,18 @@ /// Try to apply information from loop guards for \p L to \p Expr. const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Return true if the loop has no abnormal exits. That is, if the loop + /// is not infinite, it must exit through an explicit edge in the CFG. + /// (As opposed to either a) throwing out of the function or b) entering a + /// well defined infinite loop in some callee.) + bool loopHasNoAbnormalExits(const Loop *L) { + return getLoopProperties(L).HasNoAbnormalExits; + } + + /// Return true if this loop is finite by assumption. That is, + /// to be infinite, it must also be undefined. + bool loopIsFiniteByAssumption(const Loop *L); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1482,14 +1494,6 @@ return getLoopProperties(L).HasNoSideEffects; } - bool loopHasNoAbnormalExits(const Loop *L) { - return getLoopProperties(L).HasNoAbnormalExits; - } - - /// Return true if this loop is finite by assumption. That is, - /// to be infinite, it must also be undefined. - bool loopIsFiniteByAssumption(const Loop *L); - /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -36,6 +36,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" @@ -155,6 +156,9 @@ bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); + /// Use knowledge about loop finiteness to convert signed to unsigned + /// exit conditions where possible. + bool canonicalizeExitCondition(Loop *L); /// Try to eliminate loop exits based on analyzeable exit counts bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); /// Try to form loop invariant tests for loop exits by changing how many @@ -1407,6 +1411,55 @@ return true; } +bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { + // Note: Conceptually, this should probably live in simplifyAndExtend with + // the exit being identified with a bounded use walk, but doing that requires + // extending the set of "interesting" IV users to include the icmp, and + // doing that regresses results in practice by querying SCEVs before trip + // counts which rely on them which results in SCEV caching sub-optimal + // answers. + + auto *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + return false; + auto *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI || BI->isUnconditional()) + return false;; + + auto *ICmp = dyn_cast(BI->getCondition()); + if (!ICmp) + return false; + + auto *LHS = dyn_cast(ICmp->getOperand(0)); + auto *RHS = ICmp->getOperand(1); + if (!LHS || !L->isLoopInvariant(RHS)) + return false; + + // Match (icmp signed-cond zext, RHS) + switch (LHS->getOpcode()) { + default: + return false; + case Instruction::ZExt: + switch (ICmp->getPredicate()) { + default: + return false; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + break; + }; + }; + + // If the exit might not be taken in a well defined program. + if (!SE->loopHasNoAbnormalExits(L) || !SE->loopIsFiniteByAssumption(L)) + return false; + // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus + // replace the signed condition with the unsigned version. + ICmp->setPredicate(ICmp->getUnsignedPredicate()); + return true; +} + bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1788,6 +1841,14 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + if (canonicalizeExitCondition(L)) { + Changed = true; + // Given we've changed exit counts, notify SCEV + // Some nested loops may share same folded exit basic block, + // thus we need to notify top most loop. + SE->forgetTopmostLoop(L); + } + // Try to eliminate loop exits based on analyzeable exit counts if (optimizeLoopExits(L, Rewriter)) { Changed = true; Index: llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll +++ llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -42,7 +42,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -301,7 +301,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -328,7 +328,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -355,7 +355,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -382,7 +382,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -409,7 +409,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -436,7 +436,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void