diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -13652,11 +13652,45 @@ } } - if (!isa(LHS)) { + if (!isa(LHS) && isa(RHS)) { std::swap(LHS, RHS); Predicate = CmpInst::getSwappedPredicate(Predicate); } + // Check for a condition of the form (X - C1) < C2. InstCombine will create + // this form when combining two checks of the form x u<= C2 and x >=u C1. + auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() { + auto *AddExpr = dyn_cast(LHS); + if (!AddExpr) + return false; + + auto *C1 = dyn_cast(AddExpr->getOperand(0)); + auto *C2 = dyn_cast(RHS); + if (AddExpr->getNumOperands() != 2 || !C1 || !C2) + return false; + auto ExactRegion = + ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt()) + .sub(C1->getAPInt()); + + // Bail out, unless we have a non-wrapping, monotonic range. + if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet() || + ExactRegion.getLower().ugt(ExactRegion.getUpper())) + return false; + + auto *LHSUnknown = dyn_cast(AddExpr->getOperand(1)); + if (!LHSUnknown) + return false; + + auto I = RewriteMap.find(LHSUnknown->getValue()); + const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS; + RewriteMap[LHSUnknown->getValue()] = getUMaxExpr( + getConstant(ExactRegion.getLower()), + getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUpper()))); + return true; + }; + if (MatchRangeCheckIdiom()) + return; + // For now, limit to conditions that provide information about unknown // expressions. RHS also cannot contain add recurrences. auto *LHSUnknown = dyn_cast(LHS); diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1260,14 +1260,14 @@ ; CHECK-NEXT: %N.off = add i32 %N, -1 ; CHECK-NEXT: --> (-1 + %N) U: full-set S: full-set ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + %N) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,8) S: [0,8) Exits: (-1 + %N) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i16, i16* %pred, i32 %iv ; CHECK-NEXT: --> {%pred,+,2}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64)) + %pred) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,-2147483648) S: [1,-2147483648) Exits: %N LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,9) S: [1,9) Exits: %N LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %N) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 +; CHECK-NEXT: Loop %loop: max backedge-taken count is 7 ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %N) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 @@ -1289,6 +1289,44 @@ ret void } +; The function below uses a single condition to ensure %N > 2 && %N < 22. +; InstCombine transforms such checks with 2 conditions to a single check as in +; the test function. +define void @optimized_range_check_unsigned2(i16* %pred, i32 %N) { +; CHECK-LABEL: 'optimized_range_check_unsigned2' +; CHECK-NEXT: Classifying expressions for: @optimized_range_check_unsigned2 +; CHECK-NEXT: %N.off = add i32 %N, -2 +; CHECK-NEXT: --> (-2 + %N) U: full-set S: full-set +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,22) S: [0,22) Exits: (-1 + %N) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %gep = getelementptr inbounds i16, i16* %pred, i32 %iv +; CHECK-NEXT: --> {%pred,+,2}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N) to i64)) + %pred) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,23) S: [1,23) Exits: %N LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned2 +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %N) +; CHECK-NEXT: Loop %loop: max backedge-taken count is 21 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %N) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %N.off = add i32 %N, -2 + %cmp = icmp ult i32 %N.off, 20 + br i1 %cmp, label %loop, label %exit + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i16, i16* %pred, i32 %iv + store i16 0, i16* %gep, align 2 + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %N + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + ; Similar to @optimized_range_check_unsigned, but the initial compare checks ; against unsigned max (-1), which breaks the range check idiom. define void @not_optimized_range_check_unsigned1(i16* %pred, i32 %N) { @@ -1344,7 +1382,7 @@ ; CHECK-NEXT: Loop %loop: max backedge-taken count is -2 ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %N) ; CHECK-NEXT: Predicates: -; CHECK: Loop %loop: Trip multiple is 1 +; CHECK: Loop %loop: Trip multiple is 2147483648 ; entry: %N.off = add i32 %N, -1