Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -9100,6 +9100,25 @@ return true; } + // If we cannot prove strict comparison (e.g. a > b), maybe we can prove + // the facts (a >= b && a != b) separately. A typical situation is when the + // non-strict comparison is known from ranges and non-equality is known from + // dominating predicates. + auto LoosePredicate = [](ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_SGT: return ICmpInst::ICMP_SGE; + case ICmpInst::ICMP_SLT: return ICmpInst::ICMP_SLE; + case ICmpInst::ICMP_UGT: return ICmpInst::ICMP_UGE; + case ICmpInst::ICMP_ULT: return ICmpInst::ICMP_ULE; + default: return Pred; + } + }; + auto NonStrictPredicate = LoosePredicate(Pred); + if (Pred != NonStrictPredicate && + isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, LHS, RHS) && + isLoopEntryGuardedByCond(L, NonStrictPredicate, LHS, RHS)) + return true; + return false; } Index: test/Transforms/IRCE/conjunctive-checks.ll =================================================================== --- test/Transforms/IRCE/conjunctive-checks.ll +++ test/Transforms/IRCE/conjunctive-checks.ll @@ -4,10 +4,10 @@ ; CHECK-LABEL: @f_0( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 Index: test/Transforms/IRCE/decrementing-loop.ll =================================================================== --- test/Transforms/IRCE/decrementing-loop.ll +++ test/Transforms/IRCE/decrementing-loop.ll @@ -1,4 +1,8 @@ -; RUN: opt -verify-loop-info -irce -S < %s | FileCheck %s +; RUN: opt -verify-loop-info -irce -debug-only=irce -S < %s 2>&1 | FileCheck %s + +; CHECK-LABEL: irce: in function decrementing_loop: constrained Loop at depth 1 +; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 +; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) { entry: @@ -38,5 +42,85 @@ ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 } +; Make sure that we can eliminate the range check when the loop looks like: +; for (i = len.a - 1; i >= 0; --i) +; b[i] = a[i]; +define void @test_01(i32* %a, i32* %b, i32* %a_len_ptr, i32* %b_len_ptr) { + +; CHECK-LABEL: test_01 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: %rc = and i1 true, true +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.b = load i32, i32* %b_len_ptr, !range !0 + %first.itr.check = icmp ne i32 %len.a, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ %len.a, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rca = icmp ult i32 %idx.next, %len.a + %rcb = icmp ult i32 %idx.next, %len.b + %rc = and i1 %rca, %rcb + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + + in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %el.b = getelementptr i32, i32* %b, i32 %idx.next + %v = load i32, i32* %el.a + store i32 %v, i32* %el.b + %loop.cond = icmp slt i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + + out.of.bounds: + ret void + + exit: + ret void +} + +; Same as test_01, but the latch condition is unsigned +define void @test_02(i32* %a, i32* %b, i32* %a_len_ptr, i32* %b_len_ptr) { + +; CHECK-LABEL: test_02 +; CHECK: mainloop: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK: %rc = and i1 true, true +; CHECK: loop.preloop: + + entry: + %len.a = load i32, i32* %a_len_ptr, !range !0 + %len.b = load i32, i32* %b_len_ptr, !range !0 + %first.itr.check = icmp ne i32 %len.a, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ %len.a, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = sub i32 %idx, 1 + %rca = icmp ult i32 %idx.next, %len.a + %rcb = icmp ult i32 %idx.next, %len.b + %rc = and i1 %rca, %rcb + br i1 %rc, label %in.bounds, label %out.of.bounds, !prof !1 + + in.bounds: + %el.a = getelementptr i32, i32* %a, i32 %idx.next + %el.b = getelementptr i32, i32* %b, i32 %idx.next + %v = load i32, i32* %el.a + store i32 %v, i32* %el.b + %loop.cond = icmp ult i32 %idx, 2 + br i1 %loop.cond, label %exit, label %loop + + out.of.bounds: + ret void + + exit: + ret void +} + !0 = !{i32 0, i32 2147483647} !1 = !{!"branch_weights", i32 64, i32 4} Index: test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-no-preloop.ll +++ test/Transforms/IRCE/single-access-no-preloop.ll @@ -85,10 +85,10 @@ ; CHECK-LABEL: @single_access_no_preloop_with_offset( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0