Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -150,6 +150,10 @@ "number of accesses allowed to be present in a loop in order to " "enable memory promotion.")); +static cl::opt ProveViaSCEV( + "licm-use-scalar-evolution", cl::Hidden, cl::init(false), + cl::desc("Allows LICM to use Scalar Evolution analyzis for inferring facts.")); + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, @@ -177,7 +181,7 @@ static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, - DominatorTree *DT); + DominatorTree *DT, ScalarEvolution *SE); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU); @@ -974,7 +978,7 @@ // Try to reassociate instructions so that part of computations can be // done out of loop. - if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { + if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT, ProveViaSCEV ? SE : nullptr)) { Changed = true; continue; } @@ -2532,7 +2536,7 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, Value *InvariantRHS, ICmpInst &ICmp, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, - AssumptionCache *AC, DominatorTree *DT) { + AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE) { assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); @@ -2555,10 +2559,21 @@ // normal linear arithmetics). Overflows make things much more complicated, so // we want to avoid this. auto &DL = L.getHeader()->getModule()->getDataLayout(); - bool ProvedNoOverflowAfterReassociate = - computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp, - DT) == llvm::OverflowResult::NeverOverflows; - if (!ProvedNoOverflowAfterReassociate) + auto ProvedNoOverflowAfterReassociate = [&]() { + if (computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp, + DT) == llvm::OverflowResult::NeverOverflows) + return true; + if (SE) { + const SCEV *RHSS = SE->getSCEV(InvariantRHS); + const SCEV *InvariantOpS = SE->getSCEV(InvariantOp); + if (SE->willNotOverflow(Instruction::BinaryOps::Sub, /*Signed*/ true, + RHSS, InvariantOpS, &ICmp)) + return true; + } + + return false; + }; + if (!ProvedNoOverflowAfterReassociate()) return false; auto *Preheader = L.getLoopPreheader(); assert(Preheader && "Loop is not in simplify form?"); @@ -2578,7 +2593,7 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS, Value *InvariantRHS, ICmpInst &ICmp, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, - AssumptionCache *AC, DominatorTree *DT) { + AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE) { assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); @@ -2607,15 +2622,36 @@ // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that // "C1 - C2" does not overflow. auto &DL = L.getHeader()->getModule()->getDataLayout(); - bool ProvedNoOverflowAfterReassociate = - VariantSubtracted - ? (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, - &ICmp, DT) == - llvm::OverflowResult::NeverOverflows) - : (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, - &ICmp, DT) == - llvm::OverflowResult::NeverOverflows); - if (!ProvedNoOverflowAfterReassociate) + auto ProvedNoOverflowAfterReassociate = [&]() { + if (VariantSubtracted) { + if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp, + DT) == + llvm::OverflowResult::NeverOverflows) + return true; + } else { + if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp, + DT) == + llvm::OverflowResult::NeverOverflows) + return true; + } + + if (SE) { + const SCEV *RHSS = SE->getSCEV(InvariantRHS); + const SCEV *InvariantOpS = SE->getSCEV(InvariantOp); + if (VariantSubtracted) { + if (SE->willNotOverflow(Instruction::BinaryOps::Sub, /*Signed*/ true, + InvariantOpS, RHSS, &ICmp)) + return true; + } else { + if (SE->willNotOverflow(Instruction::BinaryOps::Add, /*Signed*/ true, + InvariantOpS, RHSS, &ICmp)) + return true; + } + } + + return false; + }; + if (!ProvedNoOverflowAfterReassociate()) return false; auto *Preheader = L.getLoopPreheader(); assert(Preheader && "Loop is not in simplify form?"); @@ -2636,7 +2672,7 @@ /// Reassociate and hoist add/sub expressions. static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, - DominatorTree *DT) { + DominatorTree *DT, ScalarEvolution *SE) { using namespace PatternMatch; ICmpInst::Predicate Pred; Value *LHS, *RHS; @@ -2659,10 +2695,10 @@ // TODO: We could go with smarter context, taking common dominator of all I's // users instead of I itself. - if (hoistAdd(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT)) + if (hoistAdd(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT, SE)) return true; - if (hoistSub(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT)) + if (hoistSub(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT, SE)) return true; return false; @@ -2671,7 +2707,7 @@ static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, - DominatorTree *DT) { + DominatorTree *DT, ScalarEvolution *SE) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2689,7 +2725,7 @@ } // Try to hoist add/sub's by reassociation. - if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) { + if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT, SE)) { ++NumHoisted; ++NumAddSubHoisted; return true; Index: llvm/test/Transforms/LICM/hoist-add-sub.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-add-sub.ll +++ llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s +; RUN: opt -S -passes=licm -verify-memoryssa -licm-use-scalar-evolution < %s | FileCheck %s --check-prefixes=USE_SCEV ; x - iv < 4 ==> iv > x - 4 define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) { @@ -26,6 +27,29 @@ ; CHECK: out_of_bounds: ; CHECK-NEXT: ret i32 -1 ; +; USE_SCEV-LABEL: define i32 @test_01 +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]] +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4 +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]] +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; entry: %x = load i32, ptr %x_p, !range !0 %length = load i32, ptr %length_p, !range !0 @@ -83,6 +107,36 @@ ; CHECK: failed: ; CHECK-NEXT: ret i32 -2 ; +; USE_SCEV-LABEL: define i32 @test_01a +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; USE_SCEV-NEXT: [[PRECOND_1:%.*]] = icmp sge i32 [[X]], 0 +; USE_SCEV-NEXT: [[PRECOND_2:%.*]] = icmp sge i32 [[LENGTH]], 0 +; USE_SCEV-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; USE_SCEV-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; USE_SCEV: loop.preheader: +; USE_SCEV-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4 +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]] +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; USE_SCEV: failed: +; USE_SCEV-NEXT: ret i32 -2 +; entry: %x = load i32, ptr %x_p %length = load i32, ptr %length_p @@ -139,6 +193,29 @@ ; CHECK: out_of_bounds: ; CHECK-NEXT: ret i32 -1 ; +; USE_SCEV-LABEL: define i32 @test_01_neg +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; USE_SCEV-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; entry: %x = load i32, ptr %x_p %length = load i32, ptr %length_p, !range !0 @@ -190,6 +267,29 @@ ; CHECK: out_of_bounds: ; CHECK-NEXT: ret i32 -1 ; +; USE_SCEV-LABEL: define i32 @test_02 +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; entry: %x = load i32, ptr %x_p, !range !0 %length = load i32, ptr %length_p, !range !0 @@ -247,6 +347,36 @@ ; CHECK: failed: ; CHECK-NEXT: ret i32 -2 ; +; USE_SCEV-LABEL: define i32 @test_02a +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; USE_SCEV-NEXT: [[PRECOND_1:%.*]] = icmp sge i32 [[X]], 0 +; USE_SCEV-NEXT: [[PRECOND_2:%.*]] = icmp sge i32 [[LENGTH]], 0 +; USE_SCEV-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; USE_SCEV-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; USE_SCEV: loop.preheader: +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; USE_SCEV-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; USE_SCEV: failed: +; USE_SCEV-NEXT: ret i32 -2 +; entry: %x = load i32, ptr %x_p %length = load i32, ptr %length_p @@ -303,6 +433,29 @@ ; CHECK: out_of_bounds: ; CHECK-NEXT: ret i32 -1 ; +; USE_SCEV-LABEL: define i32 @test_03 +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]] +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4 +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; entry: %x = load i32, ptr %x_p, !range !1 %length = load i32, ptr %length_p, !range !0 @@ -360,6 +513,36 @@ ; CHECK: failed: ; CHECK-NEXT: ret i32 -2 ; +; USE_SCEV-LABEL: define i32 @test_03a +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; USE_SCEV-NEXT: [[PRECOND_1:%.*]] = icmp ult i32 [[X]], 2147483640 +; USE_SCEV-NEXT: [[PRECOND_2:%.*]] = icmp sge i32 [[LENGTH]], 0 +; USE_SCEV-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; USE_SCEV-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; USE_SCEV: loop.preheader: +; USE_SCEV-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4 +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; USE_SCEV: failed: +; USE_SCEV-NEXT: ret i32 -2 +; entry: %x = load i32, ptr %x_p %length = load i32, ptr %length_p @@ -416,6 +599,29 @@ ; CHECK: out_of_bounds: ; CHECK-NEXT: ret i32 -1 ; +; USE_SCEV-LABEL: define i32 @test_04 +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; USE_SCEV-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; entry: %x = load i32, ptr %x_p, !range !0 %length = load i32, ptr %length_p, !range !0 @@ -473,6 +679,36 @@ ; CHECK: failed: ; CHECK-NEXT: ret i32 -2 ; +; USE_SCEV-LABEL: define i32 @test_04a +; USE_SCEV-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; USE_SCEV-NEXT: entry: +; USE_SCEV-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; USE_SCEV-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; USE_SCEV-NEXT: [[PRECOND_1:%.*]] = icmp sge i32 [[X]], 0 +; USE_SCEV-NEXT: [[PRECOND_2:%.*]] = icmp sge i32 [[LENGTH]], 0 +; USE_SCEV-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; USE_SCEV-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; USE_SCEV: loop.preheader: +; USE_SCEV-NEXT: br label [[LOOP:%.*]] +; USE_SCEV: loop: +; USE_SCEV-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; USE_SCEV-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]] +; USE_SCEV-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; USE_SCEV-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; USE_SCEV: backedge: +; USE_SCEV-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; USE_SCEV-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; USE_SCEV-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; USE_SCEV-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[LENGTH]] +; USE_SCEV-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; USE_SCEV: exit: +; USE_SCEV-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; USE_SCEV-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; USE_SCEV: out_of_bounds: +; USE_SCEV-NEXT: ret i32 -1 +; USE_SCEV: failed: +; USE_SCEV-NEXT: ret i32 -2 +; entry: %x = load i32, ptr %x_p %length = load i32, ptr %length_p