diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1876,6 +1876,11 @@ bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test if the given expression is known to satisfy the condition described + /// by Pred by decomposing a subtraction. + bool isKnownPredicateViaSubIdiom(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + /// Try to prove the condition described by "LHS Pred RHS" by ruling out /// integer overflow. /// diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1833,6 +1833,16 @@ } } + // zext (A umax B) --> (zext A) umax (zext B), if one operands is a constant. + if (auto *SM = dyn_cast(Op)) { + const SCEV *Op0 = SM->getOperand(0); + const SCEV *Op1 = SM->getOperand(1); + if (SM->getNumOperands() == 2 && + (isa(Op0) || isa(Op1))) + return getUMaxExpr(getZeroExtendExpr(SM->getOperand(0), Ty), + getZeroExtendExpr(SM->getOperand(1), Ty)); + } + // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; @@ -3104,6 +3114,34 @@ getEffectiveSCEVType(RHS->getType()) && "SCEVUDivExpr operand types don't match!"); + // Try to shift udiv across an addition even when no wrapping info is + // present, using the fact that (B - A) will be in [0, B+1), if + // B s>= A and A s>= 0. + // + // (-C1 + X) /u C2 can be transformed to (C1 /u C2) + (X /u C2), if + // * C1 % C2 == 0 + // * X % C2 == 0 + // * X s>= C1 + // * C1 s>= 0 + // + // If C1 and C2 are constants, at least one of udiv expression can be + // eliminated. This pattern is commonly created for trip counts + // involving pointer IVs, where a multiple of the element width + // is subtracted. + const SCEVAddExpr *Add = dyn_cast(LHS); + const SCEVConstant *C = dyn_cast(RHS); + if (Add && C && Add->getNumOperands() == 2) { + unsigned MultTrailing = C->getAPInt().countTrailingZeros(); + auto *NegOp0 = getNegativeSCEV(Add->getOperand(0)); + if (GetMinTrailingZeros(Add->getOperand(0)) >= MultTrailing && + GetMinTrailingZeros(Add->getOperand(1)) >= MultTrailing && + isKnownNonNegative(NegOp0) && + isKnownPredicate(CmpInst::ICMP_SGE, Add->getOperand(1), NegOp0)) { + return getMinusSCEV(getUDivExactExpr(Add->getOperand(1), RHS), + getUDivExactExpr(NegOp0, RHS)); + } + } + FoldingSetNodeID ID; ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); @@ -9113,7 +9151,6 @@ // First compute the unsigned distance from zero in the direction of Step. bool CountDown = StepC->getAPInt().isNegative(); const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); - // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) @@ -10095,7 +10132,10 @@ "LHS is not available at Loop Entry"); assert(isAvailableAtLoopEntry(RHS, L) && "RHS is not available at Loop Entry"); - return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS); + if (isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS)) + return true; + return isBasicBlockEntryGuardedByCond( + L->getHeader(), Pred, applyLoopGuards(LHS, L), applyLoopGuards(RHS, L)); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, @@ -10934,10 +10974,42 @@ return false; } +bool ScalarEvolution::isKnownPredicateViaSubIdiom(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + // Handle Y - X u<= Y, if X s>= 0 and Y u>= X. In that case, the result of (Y + // - X) will be in [0, Y+1) expression won't wrap in the unsigned sense. Apply + // similar reasoning for Y - X u< Y, if X s> 0. + auto *Add = dyn_cast(LHS); + if (Add && Add->getNumOperands() == 2 && + (Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT)) { + auto *X = Add->getOperand(0); + auto *Y = Add->getOperand(1); + auto Check = [this, Pred, RHS](const SCEV *X, const SCEV *Y) { + // For Y - X u<= Y, check X s>= 0, for Y - X u< Y, check X s> 0. + bool CheckX = Pred == CmpInst::ICMP_ULE ? isKnownNonPositive(X) + : isKnownPositive(X); + return Y == RHS && CheckX && isKnownNonNegative(Y) && + isKnownPredicateViaConstantRanges(CmpInst::ICMP_UGE, Y, + getNegativeSCEV(X)); + }; + + // Handle (-1) * X + Y variant. + if (Check(X, Y)) + return true; + + // Handle swapped variant Y + (-1) * X variant. + if (Check(Y, X)) + return true; + } + return false; +} + bool ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || + isKnownPredicateViaSubIdiom(Pred, LHS, RHS) || isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || @@ -11223,12 +11295,13 @@ return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, false /*MaxOrZero*/, Predicates); } + // If the backedge is taken at least once, then it will be taken // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start // is the LHS value of the less-than comparison the first time it is evaluated // and End is the RHS. const SCEV *BECountIfBackedgeTaken = - computeBECount(getMinusSCEV(End, Start), Stride, false); + computeBECount(getMinusSCEV(End, Start), Stride, false); // If the loop entry is guarded by the result of the backedge test of the // first loop iteration, then we know the backedge will be taken at least // once and so the backedge taken count is as above. If not then we use the diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1521,8 +1521,8 @@ // Can we prove that some other exit must be taken strictly before this // one? - if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, - MaxExitCount, ExitCount)) { + if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxExitCount, + ExitCount)) { foldExit(L, ExitingBB, false, DeadInsts); Changed = true; continue; diff --git a/llvm/test/Transforms/IndVarSimplify/simplify-pointer-arithmetic.ll b/llvm/test/Transforms/IndVarSimplify/simplify-pointer-arithmetic.ll --- a/llvm/test/Transforms/IndVarSimplify/simplify-pointer-arithmetic.ll +++ b/llvm/test/Transforms/IndVarSimplify/simplify-pointer-arithmetic.ll @@ -20,10 +20,7 @@ ; CHECK-NEXT: ret i1 false ; CHECK: header: ; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[P_INC:%.*]], [[LATCH:%.*]] ], [ [[P_BASE]], [[HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], [[LATCH]] ], [ 0, [[HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I_INC]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[I_ULT_EXT:%.*]] = icmp ult i64 [[I]], [[EXT]] -; CHECK-NEXT: br i1 [[I_ULT_EXT]], label [[LATCH]], label [[TRAP_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 true, label [[LATCH]], label [[TRAP_LOOPEXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i32, i32* [[P]], i64 1 ; CHECK-NEXT: [[C:%.*]] = icmp ne i32* [[P_INC]], [[P_END]] @@ -179,10 +176,7 @@ ; CHECK-NEXT: ret i1 false ; CHECK: header: ; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[P_INC:%.*]], [[LATCH:%.*]] ], [ [[P_BASE]], [[HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], [[LATCH]] ], [ 1, [[HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I_INC]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[I_ULT_EXT:%.*]] = icmp ule i64 [[I]], [[EXT]] -; CHECK-NEXT: br i1 [[I_ULT_EXT]], label [[LATCH]], label [[TRAP_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 true, label [[LATCH]], label [[TRAP_LOOPEXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i32, i32* [[P]], i64 1 ; CHECK-NEXT: [[C:%.*]] = icmp ne i32* [[P_INC]], [[P_END]] @@ -234,10 +228,7 @@ ; CHECK-NEXT: ret i1 false ; CHECK: header: ; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[P_INC:%.*]], [[LATCH:%.*]] ], [ [[P_BASE]], [[HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], [[LATCH]] ], [ 0, [[HEADER_PREHEADER]] ] -; CHECK-NEXT: [[I_INC]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[I_UGE_EXT:%.*]] = icmp uge i64 [[I]], [[EXT]] -; CHECK-NEXT: br i1 [[I_UGE_EXT]], label [[TRAP_LOOPEXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: br i1 false, label [[TRAP_LOOPEXIT:%.*]], label [[LATCH]] ; CHECK: latch: ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i32, i32* [[P]], i64 1 ; CHECK-NEXT: [[C:%.*]] = icmp ne i32* [[P_INC]], [[P_END]]