Index: lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- lib/Transforms/Scalar/LoopPredication.cpp +++ lib/Transforms/Scalar/LoopPredication.cpp @@ -230,7 +230,8 @@ cl::init(true)); namespace { -class LoopPredication { + class LoopPredication { + public: /// Represents an induction variable check: /// icmp Pred, , struct LoopICmp { @@ -246,6 +247,7 @@ << ", Limit = " << *Limit << "\n"; } }; + private: AliasAnalysis *AA; ScalarEvolution *SE; @@ -614,6 +616,21 @@ return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } +static void normalizePredicate(ScalarEvolution *SE, + LoopPredication::LoopICmp& RC) { + // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form. + // Normalize back to the ULT/SLT form for ease of handling. + // Note: At the moment, this is rather restrictive in practice. It handles + // pre-increment comparison on a canonical IV against a known positive RHS, + // but does not handle even trivial post-increment forms or non-1 steps. + // TODO: Generalize! + if (RC.Pred == ICmpInst::ICMP_NE && + RC.IV->getStepRecurrence(*SE)->isOne() && + SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit)) + RC.Pred = ICmpInst::ICMP_ULT; +} + + /// If ICI can be widened to a loop invariant condition emits the loop /// invariant condition in the loop preheader and return it, otherwise /// returns None. @@ -852,6 +869,7 @@ } }; + normalizePredicate(SE, *Result); if (IsUnsupportedPredicate(Step, Result->Pred)) { LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred << ")!\n"); Index: test/Transforms/LoopPredication/basic.ll =================================================================== --- test/Transforms/LoopPredication/basic.ll +++ test/Transforms/LoopPredication/basic.ll @@ -1635,11 +1635,14 @@ ; CHECK-LABEL: @ne_latch_zext_preinc( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[N:%.*]] = zext i16 [[N16:%.*]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] @@ -1711,11 +1714,14 @@ ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]