Index: include/llvm/IR/InstrTypes.h =================================================================== --- include/llvm/IR/InstrTypes.h +++ include/llvm/IR/InstrTypes.h @@ -973,6 +973,19 @@ /// @brief Return the predicate as if the operands were swapped. static Predicate getSwappedPredicate(Predicate pred); + /// For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE. + /// @brief Returns the non-strict version of strict comparisons. + Predicate getNonStrictPredicate() const { + return getNonStrictPredicate(getPredicate()); + } + + /// This is a static version that you can use without an instruction + /// available. + /// @returns the non-strict version of comparison provided in \p pred. + /// If \p pred is not a strict comparison predicate, returns \p pred. + /// @brief Returns the non-strict version of strict comparisons. + static Predicate getNonStrictPredicate(Predicate pred); + /// @brief Provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -9065,6 +9065,59 @@ if (isKnownPredicateViaConstantRanges(Pred, LHS, RHS)) return true; + // If we cannot prove strict comparison (e.g. a > b), maybe we can prove + // the facts (a >= b && a != b) separately. A typical situation is when the + // non-strict comparison is known from ranges and non-equality is known from + // dominating predicates. If we are proving strict comparison, we always try + // to prove non-equality and non-strict comparison separately. + auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred); + const bool ProvingStrictComparison = (Pred != NonStrictPredicate); + bool ProvedNonStrictComparison = false; + bool ProvedNonEquality = false; + + if (ProvingStrictComparison) { + ProvedNonStrictComparison = + isKnownPredicateViaConstantRanges(NonStrictPredicate, LHS, RHS); + ProvedNonEquality = + isKnownPredicateViaConstantRanges(ICmpInst::ICMP_NE, LHS, RHS); + assert((!ProvedNonStrictComparison || !ProvedNonEquality) && + "Why we were unable to prove (Pred, LHS, RHS) via cosntant ranges?"); + } + + // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard. + auto ProveViaGuard = [&](BasicBlock *Block) { + if (isImpliedViaGuard(Block, Pred, LHS, RHS)) + return true; + if (ProvingStrictComparison) { + if (!ProvedNonStrictComparison) + ProvedNonStrictComparison = + isImpliedViaGuard(Block, NonStrictPredicate, LHS, RHS); + if (!ProvedNonEquality) + ProvedNonEquality = + isImpliedViaGuard(Block, ICmpInst::ICMP_NE, LHS, RHS); + if (ProvedNonStrictComparison && ProvedNonEquality) + return true; + } + return false; + }; + + // Try to prove (Pred, LHS, RHS) using isImpliedCond. + auto ProveViaCond = [&](Value *Condition, bool Inverse) { + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) + return true; + if (ProvingStrictComparison) { + if (!ProvedNonStrictComparison) + ProvedNonStrictComparison = + isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); + if (!ProvedNonEquality) + ProvedNonEquality = + isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); + if (ProvedNonStrictComparison && ProvedNonEquality) + return true; + } + return false; + }; + // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. @@ -9073,7 +9126,7 @@ Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { - if (isImpliedViaGuard(Pair.first, Pred, LHS, RHS)) + if (ProveViaGuard(Pair.first)) return true; BranchInst *LoopEntryPredicate = @@ -9082,9 +9135,8 @@ LoopEntryPredicate->isUnconditional()) continue; - if (isImpliedCond(Pred, LHS, RHS, - LoopEntryPredicate->getCondition(), - LoopEntryPredicate->getSuccessor(0) != Pair.second)) + if (ProveViaCond(LoopEntryPredicate->getCondition(), + LoopEntryPredicate->getSuccessor(0) != Pair.second)) return true; } @@ -9096,7 +9148,7 @@ if (!DT.dominates(CI, L->getHeader())) continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + if (ProveViaCond(CI->getArgOperand(0), false)) return true; } Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -3467,6 +3467,20 @@ } } +CmpInst::Predicate CmpInst::getNonStrictPredicate(Predicate pred) { + switch (pred) { + case ICMP_SGT: return ICMP_SGE; + case ICMP_SLT: return ICMP_SLE; + case ICMP_UGT: return ICMP_UGE; + case ICMP_ULT: return ICMP_ULE; + case FCMP_OGT: return FCMP_OGE; + case FCMP_OLT: return FCMP_OLE; + case FCMP_UGT: return FCMP_UGE; + case FCMP_ULT: return FCMP_ULE; + default: return pred; + } +} + CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) { assert(CmpInst::isUnsigned(pred) && "Call only with signed predicates!"); Index: test/Transforms/IRCE/conjunctive-checks.ll =================================================================== --- test/Transforms/IRCE/conjunctive-checks.ll +++ test/Transforms/IRCE/conjunctive-checks.ll @@ -4,10 +4,10 @@ ; CHECK-LABEL: @f_0( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -1,4 +1,8 @@ -; RUN: opt -verify-loop-info -irce -S < %s | FileCheck %s +; RUN: opt -verify-loop-info -irce -debug-only=irce -S < %s 2>&1 | FileCheck %s + +; CHECK-LABEL: irce: in function decrementing_loop: constrained Loop at depth 1 +; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 +; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) { entry: @@ -38,5 +42,85 @@ ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 } +; Make sure that we can eliminate the range check when the loop looks like: +; for (i = len.a - 1; i >= 0; --i) +; b[i] = a[i]; +define void @test_01(i32* %a, i32* %b, i32* %a_len_ptr, i32* %b_len_ptr) { + +; CHECK-LABEL: test_01 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: %rc = and i1 true, true +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.b = load i32, i32* %b_len_ptr, !range !0 + %first.itr.check = icmp ne i32 %len.a, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ %len.a, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rca = icmp ult i32 %idx.next, %len.a + %rcb = icmp ult i32 %idx.next, %len.b + %rc = and i1 %rca, %rcb + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + + in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %el.b = getelementptr i32, i32* %b, i32 %idx.next + %v = load i32, i32* %el.a + store i32 %v, i32* %el.b + %loop.cond = icmp slt i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + + out.of.bounds: + ret void + + exit: + ret void +} + +; Same as test_01, but the latch condition is unsigned +define void @test_02(i32* %a, i32* %b, i32* %a_len_ptr, i32* %b_len_ptr) { + +; CHECK-LABEL: test_02 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: %rc = and i1 true, true +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.b = load i32, i32* %b_len_ptr, !range !0 + %first.itr.check = icmp ne i32 %len.a, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ %len.a, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rca = icmp ult i32 %idx.next, %len.a + %rcb = icmp ult i32 %idx.next, %len.b + %rc = and i1 %rca, %rcb + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + + in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %el.b = getelementptr i32, i32* %b, i32 %idx.next + %v = load i32, i32* %el.a + store i32 %v, i32* %el.b + %loop.cond = icmp ult i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + + out.of.bounds: + ret void + + exit: + ret void +} + !0 = !{i32 0, i32 2147483647} !1 = !{!"branch_weights", i32 64, i32 4} Index: test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-no-preloop.ll +++ test/Transforms/IRCE/single-access-no-preloop.ll @@ -85,10 +85,10 @@ ; CHECK-LABEL: @single_access_no_preloop_with_offset( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 Index: test/Transforms/IndVarSimplify/loop-invariant-conditions.ll =================================================================== --- test/Transforms/IndVarSimplify/loop-invariant-conditions.ll +++ test/Transforms/IndVarSimplify/loop-invariant-conditions.ll @@ -325,6 +325,36 @@ ret void } +; check that we can figure out that iv.next > 1 from the facts that iv >= 0 and +; iv.start != 0. +define void @test11(i64* %inc_ptr) { +; CHECK-LABEL: @test11 +entry: + %inc = load i64, i64* %inc_ptr, !range !0 + %ne.cond = icmp ne i64 %inc, 0 + br i1 %ne.cond, label %loop, label %exit + +loop: + %iv = phi i64 [ %inc, %entry ], [ %iv.next, %backedge ] + %iv.next = add i64 %iv, 1 + %brcond = icmp sgt i64 %iv.next, 1 + ; CHECK: br i1 true, label %if.true, label %if.false + br i1 %brcond, label %if.true, label %if.false + +if.true: + br label %backedge + +if.false: + br label %backedge + +backedge: + %loopcond = icmp slt i64 %iv, 200 + br i1 %loopcond, label %loop, label %exit + +exit: + ret void +} + !1 = !{i64 -1, i64 100}