Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -10463,6 +10463,35 @@ Context)) return true; + // If FoundLHS is AddRec and FoundPred is EQ, we can say that the min value of + // FoundRHS is AddRec's start value if and only if "AddRec == FoundRHS" is + // true. It means we can use "FoundRHS >= AddRec's start value". + if (FoundPred == ICmpInst::ICMP_EQ) { + bool FoundCandidate = false; + if (Pred == ICmpInst::ICMP_SGT && isa(FoundRHS)) { + std::swap(FoundLHS, FoundRHS); + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + FoundCandidate = true; + } else if (Pred == ICmpInst::ICMP_SLT && isa(FoundLHS)) + FoundCandidate = true; + + if (FoundCandidate) { + auto *AddRec = cast(FoundLHS); + auto *StepCst = dyn_cast(AddRec->getStepRecurrence(*this)); + if (StepCst && (AddRec->getNoWrapFlags(SCEV::FlagNSW) || + AddRec->getNoWrapFlags(SCEV::FlagNUW))) { + if (!StepCst->getValue()->isNegative()) { + auto *NewFoundLHS = cast(FoundLHS)->getStart(); + auto NewFoundPred = ICmpInst::ICMP_SLE; + if (isImpliedCondBalancedTypes(Pred, LHS, RHS, NewFoundPred, + NewFoundLHS, FoundRHS, Context)) + return true; + } + } + } + } + // Otherwise assume the worst. return false; } Index: llvm/test/Transforms/IRCE/sibling_loops.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IRCE/sibling_loops.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -irce -irce-skip-profitability-checks -irce-print-changed-loops -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; CHECK: irce: in function test: constrained Loop at depth 1 containing: %12
,%15,%16 + +define dso_local void @test() local_unnamed_addr #0 { + %1 = zext i32 undef to i64 + br label %2 + +2: ; preds = %4, %0 + %3 = phi i64 [ %5, %4 ], [ 1, %0 ] + br label %4 + +4: ; preds = %2 + %5 = add nuw nsw i64 %3, 1 + %6 = icmp eq i64 %5, %1 + br i1 %6, label %7, label %2 + +7: ; preds = %4 + br label %8 + +8: ; preds = %12, %7 + %9 = phi i64 [ %13, %12 ], [ 1, %7 ] + %10 = icmp slt i64 %9, undef + br i1 %10, label %11, label %12 + +11: ; preds = %8 + br label %12 + +12: ; preds = %11, %8 + %13 = add nuw nsw i64 %9, 1 + %14 = icmp eq i64 %13, %1 + br i1 %14, label %15, label %8 + +15: ; preds = %12 + ret void +} + +attributes #0 = { "target-features"="+neon" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git b601c6719226fb83c43dae62a581e5ee08bfb169)"} Index: llvm/test/Transforms/IndVarSimplify/lftr-pr20680.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/lftr-pr20680.ll +++ llvm/test/Transforms/IndVarSimplify/lftr-pr20680.ll @@ -49,10 +49,7 @@ ; CHECK-NEXT: store i32 1, i32* @b, align 4 ; CHECK-NEXT: br label [[FOR_COND2_LOOPEXIT_US_US]] ; CHECK: for.inc.us.us: -; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[FOR_INC_LR_PH_US_US]] ], [ [[INC_US_US:%.*]], [[FOR_INC_US_US]] ] -; CHECK-NEXT: [[INC_US_US]] = add nsw i32 [[TMP5]], 1 -; CHECK-NEXT: [[EXITCOND3:%.*]] = icmp ne i32 [[INC_US_US]], 1 -; CHECK-NEXT: br i1 [[EXITCOND3]], label [[FOR_INC_US_US]], label [[FOR_COND8_FOR_COND2_LOOPEXIT_CRIT_EDGE_US_US:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_INC_US_US]], label [[FOR_COND8_FOR_COND2_LOOPEXIT_CRIT_EDGE_US_US:%.*]] ; CHECK: for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us: ; CHECK-NEXT: br label [[FOR_COND2_FOR_INC13_CRIT_EDGE_US_LCSSA_US:%.*]] ; CHECK: for.body3.lr.ph.split.us.split: @@ -62,14 +59,11 @@ ; CHECK: cond.false.us: ; CHECK-NEXT: br label [[COND_END_US]] ; CHECK: cond.end.us: -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @b, align 4 -; CHECK-NEXT: [[CMP91_US:%.*]] = icmp slt i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[CMP91_US:%.*]] = icmp slt i32 [[TMP5]], 1 ; CHECK-NEXT: br i1 [[CMP91_US]], label [[FOR_INC_LR_PH_US:%.*]], label [[FOR_COND2_LOOPEXIT_US:%.*]] ; CHECK: for.inc.us: -; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[FOR_INC_LR_PH_US]] ], [ [[INC_US:%.*]], [[FOR_INC_US:%.*]] ] -; CHECK-NEXT: [[INC_US]] = add nsw i32 [[TMP7]], 1 -; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i32 [[INC_US]], 1 -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[FOR_INC_US]], label [[FOR_COND8_FOR_COND2_LOOPEXIT_CRIT_EDGE_US:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_INC_US:%.*]], label [[FOR_COND8_FOR_COND2_LOOPEXIT_CRIT_EDGE_US:%.*]] ; CHECK: for.cond2.loopexit.us: ; CHECK-NEXT: br i1 false, label [[FOR_COND2_FOR_INC13_CRIT_EDGE_US_LCSSA_US_US_LCSSA:%.*]], label [[FOR_BODY3_US]] ; CHECK: for.inc.lr.ph.us: @@ -93,12 +87,12 @@ ; CHECK: cond.false.us4: ; CHECK-NEXT: br label [[COND_END_US5]] ; CHECK: cond.end.us5: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @b, align 4 -; CHECK-NEXT: [[CMP91_US7:%.*]] = icmp slt i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[CMP91_US7:%.*]] = icmp slt i32 [[TMP6]], 1 ; CHECK-NEXT: br i1 [[CMP91_US7]], label [[FOR_INC_LR_PH_US12:%.*]], label [[FOR_COND2_LOOPEXIT_US11:%.*]] ; CHECK: for.inc.us8: -; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[FOR_INC_LR_PH_US12]] ], [ [[INC_US9:%.*]], [[FOR_INC_US8:%.*]] ] -; CHECK-NEXT: [[INC_US9]] = add nsw i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[FOR_INC_LR_PH_US12]] ], [ [[INC_US9:%.*]], [[FOR_INC_US8:%.*]] ] +; CHECK-NEXT: [[INC_US9]] = add nsw i32 [[TMP7]], 1 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[INC_US9]], 1 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_INC_US8]], label [[FOR_COND8_FOR_COND2_LOOPEXIT_CRIT_EDGE_US13:%.*]] ; CHECK: for.cond2.loopexit.us11: @@ -122,14 +116,14 @@ ; CHECK: cond.false: ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* @b, align 4 -; CHECK-NEXT: [[CMP91:%.*]] = icmp slt i32 [[TMP10]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[CMP91:%.*]] = icmp slt i32 [[TMP8]], 1 ; CHECK-NEXT: br i1 [[CMP91]], label [[FOR_INC_LR_PH:%.*]], label [[FOR_COND2_LOOPEXIT]] ; CHECK: for.inc.lr.ph: ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: -; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP10]], [[FOR_INC_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC]] ] -; CHECK-NEXT: [[INC]] = add nsw i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[FOR_INC_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[INC]] = add nsw i32 [[TMP9]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC]], label [[FOR_COND8_FOR_COND2_LOOPEXIT_CRIT_EDGE:%.*]] ; CHECK: for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa: @@ -143,8 +137,8 @@ ; CHECK-NEXT: br label [[FOR_INC13]] ; CHECK: for.inc13: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXITCOND4]], label [[FOR_COND2_PREHEADER]], label [[FOR_END15:%.*]] +; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND2]], label [[FOR_COND2_PREHEADER]], label [[FOR_END15:%.*]] ; CHECK: for.end15: ; CHECK-NEXT: ret void ;