Index: llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp @@ -155,7 +155,7 @@ // When S = -1 (i.e. reverse iterating loop), the transformation is supported // when: // * The loop has a single latch with the condition of the form: -// B(X) = X latchLimit, where is u> or s>. +// B(X) = X latchLimit, where is u>, u>=, s>, or s>=. // * The guard condition is of the form // G(X) = X - 1 u< guardLimit // @@ -171,6 +171,10 @@ // guardStart u< guardLimit && latchLimit u>= 1. // Similarly for sgt condition the widened condition is: // guardStart u< guardLimit && latchLimit s>= 1. +// For uge condition the widened condition is: +// guardStart u< guardLimit && latchLimit u> 1. +// For sge condition the widened condition is: +// guardStart u< guardLimit && latchLimit s> 1. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" @@ -485,9 +489,7 @@ // latchLimit 1. // See the header comment for reasoning of the checks. Instruction *InsertAt = Preheader->getTerminator(); - auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) - ? ICmpInst::ICMP_SGE - : ICmpInst::ICMP_UGE; + auto LimitCheckPred = getLatchPredicateForGuard(LatchCheck.Pred); auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT, GuardStart, GuardLimit, InsertAt); auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, @@ -671,7 +673,8 @@ Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; } else { assert(Step->isAllOnesValue() && "Step should be -1!"); - return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT; + return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT && + Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE; } }; Index: llvm/trunk/test/Transforms/LoopPredication/reverse.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/reverse.ll +++ llvm/trunk/test/Transforms/LoopPredication/reverse.ll @@ -138,3 +138,108 @@ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } + +define i32 @signed_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) { +; CHECK-LABEL: @signed_reverse_loop_n_to_lower_limit_equal( +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +; CHECK: loop.preheader: +; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1 +; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length +; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp sgt i32 %lowerlimit, 1 +; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]] +loop.preheader: + br label %loop + +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +loop: + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ] + %i.next = add nsw i32 %i, -1 + %within.bounds = icmp ult i32 %i.next, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %i.i64 = zext i32 %i.next to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %continue = icmp sge i32 %i, %lowerlimit + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @unsigned_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) { +; CHECK-LABEL: @unsigned_reverse_loop_n_to_lower_limit_equal( +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +; CHECK: loop.preheader: +; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1 +; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length +; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp ugt i32 %lowerlimit, 1 +; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]] +loop.preheader: + br label %loop + +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +loop: + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ] + %i.next = add nsw i32 %i, -1 + %within.bounds = icmp ult i32 %i.next, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %i.i64 = zext i32 %i.next to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %continue = icmp uge i32 %i, %lowerlimit + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + + +; if we predicated the loop, the guard will definitely fail and we will +; deoptimize early on. +define i32 @unsigned_reverse_loop_n_to_1(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) { +; CHECK-LABEL: @unsigned_reverse_loop_n_to_1( +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +; CHECK: loop.preheader: +; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1 +; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length +; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], false +loop.preheader: + br label %loop + +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +loop: + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ] + %i.next = add nsw i32 %i, -1 + %within.bounds = icmp ult i32 %i.next, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %i.i64 = zext i32 %i.next to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %continue = icmp uge i32 %i, 1 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} +