Index: llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp @@ -100,26 +100,25 @@ // implies M. // // For now the transformation is limited to the following case: -// * The loop has a single latch with either ult or slt icmp condition. +// * The loop has a single latch with the condition of the form: +// ++i latchLimit, where is u<, u<=, s<, or s<=. // * The step of the IV used in the latch condition is 1. // * The IV of the latch condition is the same as the post increment IV of the // guard condition. -// * The guard condition is ult. +// * The guard condition is +// i u< guardLimit. // -// In this case the latch is of the from: -// ++i u< latchLimit or ++i s< latchLimit -// and the guard is of the form: -// i u< guardLimit -// -// For the unsigned latch comparison case M is: +// For the ult latch comparison case M is: // forall X . X u< guardLimit && (X + 1) u< latchLimit => // (X + 1) u< guardLimit // // This is true if latchLimit u<= guardLimit since then // (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit. // -// So the widened condition is: +// So for ult condition the widened condition is: // i.start u< guardLimit && latchLimit u<= guardLimit +// Similarly for ule condition the widened condition is: +// i.start u< guardLimit && latchLimit u< guardLimit // // For the signed latch comparison case M is: // forall X . X u< guardLimit && (X + 1) s< latchLimit => @@ -147,6 +146,8 @@ // // So the widened condition is: // i.start u< guardLimit && latchLimit s<= guardLimit +// Similarly for sle condition the widened condition is: +// i.start u< guardLimit && latchLimit s< guardLimit // //===----------------------------------------------------------------------===// @@ -303,7 +304,7 @@ DEBUG(ICI->dump()); // parseLoopStructure guarantees that the latch condition is: - // ++i u< latchLimit or ++i s< latchLimit + // ++i latchLimit, where is u<, u<=, s<, or s<=. // We are looking for the range checks of the form: // i u< guardLimit auto RangeCheck = parseLoopICmp(ICI); @@ -327,15 +328,27 @@ assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one"); const SCEV *Start = RangeCheckIV->getStart(); - // Generate the widened condition. See the file header comment for reasoning. - // If the latch condition is unsigned: - // i.start u< guardLimit && latchLimit u<= guardLimit - // If the latch condition is signed: - // i.start u< guardLimit && latchLimit s<= guardLimit - - auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) - ? ICmpInst::ICMP_SLE - : ICmpInst::ICMP_ULE; + // Generate the widened condition: + // i.start u< guardLimit && latchLimit guardLimit + // where depends on the latch condition predicate. See the file + // header comment for the reasoning. + ICmpInst::Predicate LimitCheckPred; + switch (LatchCheck.Pred) { + case ICmpInst::ICMP_ULT: + LimitCheckPred = ICmpInst::ICMP_ULE; + break; + case ICmpInst::ICMP_ULE: + LimitCheckPred = ICmpInst::ICMP_ULT; + break; + case ICmpInst::ICMP_SLT: + LimitCheckPred = ICmpInst::ICMP_SLE; + break; + case ICmpInst::ICMP_SLE: + LimitCheckPred = ICmpInst::ICMP_SLT; + break; + default: + llvm_unreachable("Unsupported loop latch!"); + } auto CanExpand = [this](const SCEV *S) { return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); @@ -443,7 +456,9 @@ } if (Result->Pred != ICmpInst::ICMP_ULT && - Result->Pred != ICmpInst::ICMP_SLT) { + Result->Pred != ICmpInst::ICMP_SLT && + Result->Pred != ICmpInst::ICMP_ULE && + Result->Pred != ICmpInst::ICMP_SLE) { DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred << ")!\n"); return None; Index: llvm/trunk/test/Transforms/LoopPredication/basic.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/basic.ll +++ llvm/trunk/test/Transforms/LoopPredication/basic.ll @@ -39,6 +39,42 @@ ret i32 %result } +define i32 @unsigned_loop_0_to_n_ule_latch_ult_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @unsigned_loop_0_to_n_ule_latch_ult_check +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ult i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ule i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + define i32 @unsigned_loop_0_to_n_ugt_check(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @unsigned_loop_0_to_n_ugt_check entry: @@ -111,6 +147,78 @@ ret i32 %result } +define i32 @signed_loop_0_to_n_inverse_latch_predicate(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_inverse_latch_predicate +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp slt i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp sgt i32 %i.next, %n + br i1 %continue, label %exit, label %loop + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @signed_loop_0_to_n_sle_latch_ult_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @signed_loop_0_to_n_sle_latch_ult_check +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length +; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp slt i32 %n, %length +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]] +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp sle i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n entry: