diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -89,6 +89,7 @@ #include using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "indvars" @@ -155,6 +156,9 @@ bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); + /// See if we can convert an exit condition from signed to unsigned. + /// (See inline comment about why this is duplicated from simplifyAndExtend) + bool canonicalizeExitCondition(Loop *L); /// Try to eliminate loop exits based on analyzeable exit counts bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); /// Try to form loop invariant tests for loop exits by changing how many @@ -1346,7 +1350,6 @@ SmallVectorImpl &DeadInsts) { ICmpInst::Predicate Pred; Value *LHS, *RHS; - using namespace PatternMatch; BasicBlock *TrueSucc, *FalseSucc; if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) @@ -1407,6 +1410,55 @@ return true; } +bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { + // Note: This is duplicating a particular part on SimplifyIndVars reasoning. + // We need to duplicate it because given icmp zext(small-iv), C, IVUsers + // never reaches the icmp since the zext doesn't fold to an AddRec unless + // it already has flags. The alternative to this would be to extending the + // set of "interesting" IV users to include the icmp, but doing that + // regresses results in practice by querying SCEVs before trip counts which + // rely on them which results in SCEV caching sub-optimal answers. The + // concern about caching sub-optimal results is why we only query SCEVs of + // the loop invariant RHS here. + + auto *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + return false; + auto *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + return false; + assert(BI->isConditional() && "exit branch must be conditional"); + + auto *ICmp = dyn_cast(BI->getCondition()); + if (!ICmp) + return false; + + auto *LHS = ICmp->getOperand(0); + auto *RHS = ICmp->getOperand(1); + // Avoid computing SCEVs in the loop to avoid poisoning cache with + // sub-optimal results. + if (!L->isLoopInvariant(RHS)) + return false; + + // Match (icmp signed-cond zext, RHS) + Value *LHSOp = nullptr; + if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) + return false; + + const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); + const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); + const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + if (!FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) + return false; + + // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus + // replace the signed condition with the unsigned version. + ICmp->setPredicate(ICmp->getUnsignedPredicate()); + return true; +} + bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1788,6 +1840,11 @@ // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + if (canonicalizeExitCondition(L)) + // We've changed the predicate, but have not changed exit counts, or the + // values which can flow through any SCEV. i.e, no invalidation needed. + Changed = true; + // Try to eliminate loop exits based on analyzeable exit counts if (optimizeLoopExits(L, Rewriter)) { Changed = true; diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll --- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll +++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -71,7 +71,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ZEXT]], [[N]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -301,7 +301,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -355,7 +355,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -409,7 +409,7 @@ ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], 254 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void