Index: lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- lib/Transforms/Scalar/LoopPredication.cpp +++ lib/Transforms/Scalar/LoopPredication.cpp @@ -155,7 +155,7 @@ // When S = -1 (i.e. reverse iterating loop), the transformation is supported // when: // * The loop has a single latch with the condition of the form: -// B(X) = X latchLimit, where is u> or s>. +// B(X) = X latchLimit, where is u>, u>=, s>, or s>=. // * The guard condition is of the form // G(X) = X - 1 u< guardLimit // @@ -171,6 +171,10 @@ // guardStart u< guardLimit && latchLimit u>= 1. // Similarly for sgt condition the widened condition is: // guardStart u< guardLimit && latchLimit s>= 1. +// For uge condition the widened condition is: +// guardStart u< guardLimit && latchLimit u> 1. +// For sge condition the widened condition is: +// guardStart u< guardLimit && latchLimit s> 1. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" @@ -266,6 +270,11 @@ // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do // so. Optional generateLoopLatchCheck(Type *RangeCheckType); + + // Returns the latch predicate for guard. SGT -> SGE, UGT -> UGE, SGE -> SGT, + // UGE -> UGT, etc. + ICmpInst::Predicate getLatchPredicateForGuard(ICmpInst::Predicate Pred); + public: LoopPredication(ScalarEvolution *SE) : SE(SE){}; bool runOnLoop(Loop *L); @@ -391,6 +400,30 @@ return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); } +ICmpInst::Predicate +LoopPredication::getLatchPredicateForGuard(ICmpInst::Predicate Pred) { + switch (LatchCheck.Pred) { + case ICmpInst::ICMP_ULT: + return ICmpInst::ICMP_ULE; + case ICmpInst::ICMP_ULE: + return ICmpInst::ICMP_ULT; + case ICmpInst::ICMP_SLT: + return ICmpInst::ICMP_SLE; + case ICmpInst::ICMP_SLE: + return ICmpInst::ICMP_SLT; + case ICmpInst::ICMP_UGT: + return ICmpInst::ICMP_UGE; + case ICmpInst::ICMP_UGE: + return ICmpInst::ICMP_UGT; + case ICmpInst::ICMP_SGT: + return ICmpInst::ICMP_SGE; + case ICmpInst::ICMP_SGE: + return ICmpInst::ICMP_SGT; + default: + llvm_unreachable("Unsupported loop latch!"); + } +} + Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, SCEVExpander &Expander, IRBuilder<> &Builder) { @@ -415,23 +448,7 @@ DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - ICmpInst::Predicate LimitCheckPred; - switch (LatchCheck.Pred) { - case ICmpInst::ICMP_ULT: - LimitCheckPred = ICmpInst::ICMP_ULE; - break; - case ICmpInst::ICMP_ULE: - LimitCheckPred = ICmpInst::ICMP_ULT; - break; - case ICmpInst::ICMP_SLT: - LimitCheckPred = ICmpInst::ICMP_SLE; - break; - case ICmpInst::ICMP_SLE: - LimitCheckPred = ICmpInst::ICMP_SLT; - break; - default: - llvm_unreachable("Unsupported loop latch!"); - } + auto LimitCheckPred = getLatchPredicateForGuard(LatchCheck.Pred); DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n"); DEBUG(dbgs() << "RHS: " << *RHS << "\n"); @@ -472,9 +489,7 @@ // latchLimit 1. // See the header comment for reasoning of the checks. Instruction *InsertAt = Preheader->getTerminator(); - auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) - ? ICmpInst::ICMP_SGE - : ICmpInst::ICMP_UGE; + auto LimitCheckPred = getLatchPredicateForGuard(LatchCheck.Pred); auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT, GuardStart, GuardLimit, InsertAt); auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, @@ -658,7 +673,8 @@ Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; } else { assert(Step->isAllOnesValue() && "Step should be -1!"); - return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT; + return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT && + Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE; } }; Index: test/Transforms/LoopPredication/reverse.ll =================================================================== --- test/Transforms/LoopPredication/reverse.ll +++ test/Transforms/LoopPredication/reverse.ll @@ -138,3 +138,108 @@ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } + +define i32 @signed_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) { +; CHECK-LABEL: @signed_reverse_loop_n_to_lower_limit_equal( +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +; CHECK: loop.preheader: +; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1 +; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length +; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp sgt i32 %lowerlimit, 1 +; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]] +loop.preheader: + br label %loop + +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +loop: + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ] + %i.next = add nsw i32 %i, -1 + %within.bounds = icmp ult i32 %i.next, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %i.i64 = zext i32 %i.next to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %continue = icmp sge i32 %i, %lowerlimit + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @unsigned_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) { +; CHECK-LABEL: @unsigned_reverse_loop_n_to_lower_limit_equal( +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +; CHECK: loop.preheader: +; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1 +; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length +; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp ugt i32 %lowerlimit, 1 +; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]] +loop.preheader: + br label %loop + +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +loop: + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ] + %i.next = add nsw i32 %i, -1 + %within.bounds = icmp ult i32 %i.next, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %i.i64 = zext i32 %i.next to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %continue = icmp uge i32 %i, %lowerlimit + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + + +; if we predicated the loop, the guard will definitely fail and we will +; deoptimize early on. +define i32 @unsigned_reverse_loop_n_to_1(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) { +; CHECK-LABEL: @unsigned_reverse_loop_n_to_1( +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +; CHECK: loop.preheader: +; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1 +; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length +; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], false +loop.preheader: + br label %loop + +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] +loop: + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ] + %i.next = add nsw i32 %i, -1 + %within.bounds = icmp ult i32 %i.next, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %i.i64 = zext i32 %i.next to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %continue = icmp uge i32 %i, 1 + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} +