diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -119,6 +119,16 @@ cl::desc("If set to true, IRCE may eliminate wide range checks in loops " "with narrow latch condition.")); +static cl::opt MaxTypeSizeForOverflowCheck( + "irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32), + cl::desc( + "Maximum size of range check type for which can be produced runtime " + "overflow check of its limit's computation")); + +static cl::opt + PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks", + cl::Hidden, cl::init(false)); + static const char *ClonedLoopTag = "irce.loop.clone"; #define DEBUG_TYPE "irce" @@ -156,6 +166,10 @@ const SCEVAddRecExpr *&Index, const SCEV *&End); + static bool reassociateSubLHS(Loop *L, Value *VariantLHS, Value *InvariantRHS, + ICmpInst::Predicate Pred, ScalarEvolution &SE, + const SCEVAddRecExpr *&Index, const SCEV *&End); + public: const SCEV *getBegin() const { return Begin; } const SCEV *getStep() const { return Step; } @@ -281,6 +295,10 @@ if (parseIvAgaisntLimit(L, LHS, RHS, Pred, SE, Index, End)) return true; + if (reassociateSubLHS(L, LHS, RHS, Pred, SE, Index, End)) + return true; + + // TODO: support ReassociateAddLHS return false; } @@ -346,6 +364,126 @@ llvm_unreachable("default clause returns!"); } +// Try to parse range check in the form of "IV - Offset vs Limit" or "Offset - +// IV vs Limit" +bool InductiveRangeCheck::reassociateSubLHS( + Loop *L, Value *VariantLHS, Value *InvariantRHS, ICmpInst::Predicate Pred, + ScalarEvolution &SE, const SCEVAddRecExpr *&Index, const SCEV *&End) { + Value *LHS, *RHS; + if (!match(VariantLHS, m_Sub(m_Value(LHS), m_Value(RHS)))) + return false; + + const SCEV *IV = SE.getSCEV(LHS); + const SCEV *Offset = SE.getSCEV(RHS); + const SCEV *Limit = SE.getSCEV(InvariantRHS); + + bool OffsetSubtracted = false; + if (SE.isLoopInvariant(IV, L)) + // "Offset - IV vs Limit" + std::swap(IV, Offset); + else if (SE.isLoopInvariant(Offset, L)) + // "IV - Offset vs Limit" + OffsetSubtracted = true; + else + return false; + + const auto *AddRec = dyn_cast(IV); + if (!AddRec) + return false; + + // In order to turn "IV - Offset < Limit" into "IV < Limit + Offset", we need + // to be able to freely move values from left side of inequality to right side + // (just as in normal linear arithmetics). Overflows make things much more + // complicated, so we want to avoid this. + // + // Let's prove that the initial subtraction doesn't overflow with all IV's + // values from the safe range constructed for that check. + // + // [Case 1] IV - Offset < Limit + // It doesn't overflow if: + // SINT_MIN <= IV - Offset <= SINT_MAX + // In terms of scaled SINT we need to prove: + // SINT_MIN + Offset <= IV <= SINT_MAX + Offset + // Safe range will be constructed: + // 0 <= IV < Limit + Offset + // It means that 'IV - Offset' doesn't underflow, because: + // SINT_MIN + Offset < 0 <= IV + // and doesn't overflow: + // IV < Limit + Offset <= SINT_MAX + Offset + // + // [Case 2] Offset - IV > Limit + // It doesn't overflow if: + // SINT_MIN <= Offset - IV <= SINT_MAX + // In terms of scaled SINT we need to prove: + // -SINT_MIN >= IV - Offset >= -SINT_MAX + // Offset - SINT_MIN >= IV >= Offset - SINT_MAX + // Safe range will be constructed: + // 0 <= IV < Offset - Limit + // It means that 'Offset - IV' doesn't underflow, because + // Offset - SINT_MAX < 0 <= IV + // and doesn't overflow: + // IV < Offset - Limit <= Offset - SINT_MIN + // + // For the computed upper boundary of the IV's range (Offset +/- Limit) we + // don't know exactly whether it overflows or not. So if we can't prove this + // fact at compile time, we scale boundary computations to a wider type with + // the intention to add runtime overflow check. + + auto getExprScaledIfOverflow = [&](Instruction::BinaryOps BinOp, + const SCEV *LHS, + const SCEV *RHS) -> const SCEV * { + const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *, + SCEV::NoWrapFlags, unsigned); + switch (BinOp) { + default: + llvm_unreachable("Unsupported binary op"); + case Instruction::Add: + Operation = &ScalarEvolution::getAddExpr; + break; + case Instruction::Sub: + Operation = &ScalarEvolution::getMinusSCEV; + break; + } + + if (SE.willNotOverflow(BinOp, ICmpInst::isSigned(Pred), LHS, RHS, + cast(VariantLHS))) + return (SE.*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0); + + // We couldn't prove that the expression does not overflow. + // Than scale it to a wider type to check overflow at runtime. + auto *Ty = cast(LHS->getType()); + if (Ty->getBitWidth() > MaxTypeSizeForOverflowCheck) + return nullptr; + + auto WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); + return (SE.*Operation)(SE.getSignExtendExpr(LHS, WideTy), + SE.getSignExtendExpr(RHS, WideTy), SCEV::FlagAnyWrap, + 0); + }; + + if (OffsetSubtracted) + // "IV - Offset < Limit" -> "IV" < Offset + Limit + Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Offset, Limit); + else { + // "Offset - IV > Limit" -> "IV" < Offset - Limit + Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Sub, Offset, Limit); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { + // "Expr <= Limit" -> "Expr < Limit + 1" + if (Pred == ICmpInst::ICMP_SLE && Limit) + Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Limit, + SE.getOne(Limit->getType())); + if (Limit) { + Index = AddRec; + End = Limit; + return true; + } + } + return false; +} + void InductiveRangeCheck::extractRangeChecksFromCond( Loop *L, ScalarEvolution &SE, Use &ConditionUse, SmallVectorImpl &Checks, @@ -1593,11 +1731,34 @@ // if latch check is more narrow. auto *IVType = dyn_cast(IndVar->getType()); auto *RCType = dyn_cast(getBegin()->getType()); + auto *EndType = dyn_cast(getEnd()->getType()); // Do not work with pointer types. if (!IVType || !RCType) return std::nullopt; if (IVType->getBitWidth() > RCType->getBitWidth()) return std::nullopt; + + auto PrintRangeCheck = [&](raw_ostream &OS) { + auto L = IndVar->getLoop(); + OS << "irce: in function "; + OS << L->getHeader()->getParent()->getName(); + OS << ", in "; + L->print(OS); + OS << "there is range check with scaled boundary:\n"; + print(OS); + }; + + if (EndType->getBitWidth() > RCType->getBitWidth()) { + assert(EndType->getBitWidth() == RCType->getBitWidth() * 2); + if (PrintScaledBoundaryRangeChecks) + PrintRangeCheck(errs()); + // End is computed with extended type but will be truncated to a narrow one + // type of range check. Therefore we need a check that the result will not + // overflow in terms of narrow type. + // TODO: Support runtime overflow check for End + return std::nullopt; + } + // IndVar is of the form "A + B * I" (where "I" is the canonical induction // variable, that may or may not exist as a real llvm::Value in the loop) and // this inductive range check is a range check on the "C + D * I" ("C" is diff --git a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll --- a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll +++ b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll @@ -1,5 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -verify-loop-info -passes=irce -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -passes=irce -irce-print-scaled-boundary-range-checks -S < %s 2>&1 | FileCheck %s + + +; CHECK: irce: in function test1, in Loop at depth 1 containing: %loop
,%inbounds +; CHECK-NEXT: there is range check with scaled boundary: +; CHECK-NEXT: InductiveRangeCheck: +; CHECK-NEXT: Begin: 0 Step: 1 End: (-1 + (sext i8 %n to i16)) +; CHECK-NEXT: CheckUse: br i1 %check, label %inbounds, label %out_of_bounds Operand: 0 +; +; CHECK-NEXT: irce: in function test4, in Loop at depth 1 containing: %loop
,%inbounds +; CHECK-NEXT: there is range check with scaled boundary: +; CHECK-NEXT: InductiveRangeCheck: +; CHECK-NEXT: Begin: 0 Step: 1 End: (-2 + (sext i8 %n to i16)) +; CHECK-NEXT: CheckUse: br i1 %check, label %inbounds, label %out_of_bounds Operand: 0 +; +; CHECK-NEXT: irce: in function test_overflow_check_runtime, in Loop at depth 1 containing: %loop
,%inbounds +; CHECK-NEXT: there is range check with scaled boundary: +; CHECK-NEXT: InductiveRangeCheck: +; CHECK-NEXT: Begin: 0 Step: 1 End: (3 + (zext i8 %n to i16)) +; CHECK-NEXT: CheckUse: br i1 %check, label %inbounds, label %out_of_bounds Operand: 0 ; IV = 0; IV = 2) @@ -66,25 +85,64 @@ ; CHECK-NEXT: [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0 ; CHECK-NEXT: br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i8 [[N]], -1 +; CHECK-NEXT: [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP0]], i8 0) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i8 [[SMIN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = mul i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[SMIN2:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP2]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN2]], i8 0) +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader4: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ] ; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[N]], [[IDX]] ; CHECK-NEXT: [[CHECK:%.*]] = icmp sge i8 [[SUB]], 2 -; CHECK-NEXT: br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]] +; CHECK-NEXT: br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]] ; CHECK: inbounds: ; CHECK-NEXT: [[IDX_NEXT]] = add nuw i8 [[IDX]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ] +; CHECK-NEXT: [[IDX_LCSSA3:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: exit.loopexit.loopexit: +; CHECK-NEXT: [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ] +; CHECK-NEXT: [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA3]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret i8 [[RES]] +; CHECK: out_of_bounds.loopexit: +; CHECK-NEXT: [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ] +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out_of_bounds.loopexit5: +; CHECK-NEXT: [[IDX_LCSSA_PH6:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ] +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] ; CHECK: out_of_bounds: -; CHECK-NEXT: [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ] +; CHECK-NEXT: [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH6]], [[OUT_OF_BOUNDS_LOOPEXIT5]] ] ; CHECK-NEXT: ret i8 [[IDX_LCSSA]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]] +; CHECK-NEXT: [[CHECK_POSTLOOP:%.*]] = icmp sge i8 [[SUB_POSTLOOP]], 2 +; CHECK-NEXT: br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]] +; CHECK: inbounds.postloop: +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]] +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone [[META6:![0-9]+]] ; entry: %n = load i8, ptr %p, !range !0 @@ -338,7 +396,7 @@ ; CHECK: inbounds.postloop: ; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1 ; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]] -; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone [[META6]] ; entry: %n = load i8, ptr %p, !range !0 @@ -429,25 +487,69 @@ ; CHECK-NEXT: [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0 ; CHECK-NEXT: br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[N]], 127 +; CHECK-NEXT: [[SMAX:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 0) +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[TMP0]], [[SMAX]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i8 [[N]], -2 +; CHECK-NEXT: [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[TMP3]], i8 0) +; CHECK-NEXT: [[SMAX2:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 -1) +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i8 [[SMAX2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = mul i8 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[SMIN3:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP5]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN3]], i8 0) +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER6:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader6: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER6]] ] ; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[N]], [[IDX]] ; CHECK-NEXT: [[CHECK:%.*]] = icmp sgt i8 [[SUB]], 2 -; CHECK-NEXT: br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]] +; CHECK-NEXT: br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT7:%.*]] ; CHECK: inbounds: ; CHECK-NEXT: [[IDX_NEXT]] = add nuw i8 [[IDX]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ] +; CHECK-NEXT: [[IDX_LCSSA5:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ] +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: exit.loopexit.loopexit: +; CHECK-NEXT: [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ] +; CHECK-NEXT: [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA5]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret i8 [[RES]] +; CHECK: out_of_bounds.loopexit: +; CHECK-NEXT: [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ] +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out_of_bounds.loopexit7: +; CHECK-NEXT: [[IDX_LCSSA_PH8:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ] +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] ; CHECK: out_of_bounds: -; CHECK-NEXT: [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ] +; CHECK-NEXT: [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH8]], [[OUT_OF_BOUNDS_LOOPEXIT7]] ] ; CHECK-NEXT: ret i8 [[IDX_LCSSA]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]] +; CHECK-NEXT: [[CHECK_POSTLOOP:%.*]] = icmp sgt i8 [[SUB_POSTLOOP]], 2 +; CHECK-NEXT: br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]] +; CHECK: inbounds.postloop: +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]] +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone [[META6]] ; entry: %n = load i8, ptr %p, !range !0 @@ -652,7 +754,7 @@ ; CHECK: inbounds.postloop: ; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1 ; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]] -; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]], !irce.loop.clone [[META6]] ; entry: %precheck = icmp sgt i8 %limit, 0 @@ -743,7 +845,7 @@ ; CHECK: inbounds.postloop: ; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1 ; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]] -; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone !6 +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]], !irce.loop.clone [[META6]] ; entry: %n = load i8, ptr %p, !range !0 @@ -769,4 +871,158 @@ ret i8 %idx; } +; IV = 0; IV -2) +; +; IRCE is allowed. +; IRCE will reassociate this range check to the 'IV < N + 2', +; since N < 126 no-overflow fact is provable at compile time. +define i8 @test_overflow_check_compile_time(i8 %limit, ptr %p) { +; CHECK-LABEL: define i8 @test_overflow_check_compile_time +; CHECK-SAME: (i8 [[LIMIT:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG11:![0-9]+]] +; CHECK-NEXT: [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0 +; CHECK-NEXT: br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i8 [[N]], 2 +; CHECK-NEXT: [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[LIMIT]], i8 [[TMP0]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 0) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER3:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader3: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER3]] ] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[N]], [[IDX]] +; CHECK-NEXT: [[CHECK:%.*]] = icmp sgt i8 [[SUB]], -2 +; CHECK-NEXT: br i1 true, label [[INBOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT4:%.*]] +; CHECK: inbounds: +; CHECK-NEXT: [[IDX_NEXT]] = add nuw i8 [[IDX]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i8 [ [[IDX_NEXT]], [[INBOUNDS]] ] +; CHECK-NEXT: [[IDX_LCSSA2:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[IDX_NEXT_LCSSA]], [[LIMIT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i8 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: exit.loopexit.loopexit: +; CHECK-NEXT: [[IDX_LCSSA1_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX_LCSSA2]], [[MAIN_EXIT_SELECTOR]] ], [ [[IDX_LCSSA1_PH]], [[EXIT_LOOPEXIT_LOOPEXIT:%.*]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i8 [[RES]] +; CHECK: out_of_bounds.loopexit: +; CHECK-NEXT: [[IDX_LCSSA_PH:%.*]] = phi i8 [ [[IDX_POSTLOOP]], [[LOOP_POSTLOOP:%.*]] ] +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out_of_bounds.loopexit4: +; CHECK-NEXT: [[IDX_LCSSA_PH5:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ] +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out_of_bounds: +; CHECK-NEXT: [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX_LCSSA_PH]], [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] ], [ [[IDX_LCSSA_PH5]], [[OUT_OF_BOUNDS_LOOPEXIT4]] ] +; CHECK-NEXT: ret i8 [[IDX_LCSSA]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP]] = phi i8 [ [[IDX_NEXT_POSTLOOP:%.*]], [[INBOUNDS_POSTLOOP]] ], [ [[IDX_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[SUB_POSTLOOP:%.*]] = sub i8 [[N]], [[IDX_POSTLOOP]] +; CHECK-NEXT: [[CHECK_POSTLOOP:%.*]] = icmp sgt i8 [[SUB_POSTLOOP]], -2 +; CHECK-NEXT: br i1 [[CHECK_POSTLOOP]], label [[INBOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]] +; CHECK: inbounds.postloop: +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add nuw i8 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i8 [[IDX_NEXT_POSTLOOP]], [[LIMIT]] +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP12:![0-9]+]], !irce.loop.clone [[META6]] +; +entry: + %n = load i8, ptr %p, !range !1 + %precheck = icmp sgt i8 %limit, 0 + br i1 %precheck, label %loop, label %exit + +loop: + %idx = phi i8 [ %idx.next, %inbounds ], [ 0, %entry ] + %sub = sub i8 %n, %idx + %check = icmp sgt i8 %sub, -2 + br i1 %check, label %inbounds, label %out_of_bounds + +inbounds: + %idx.next = add nuw i8 %idx, 1 + %cmp = icmp slt i8 %idx.next, %limit + br i1 %cmp, label %loop, label %exit + +exit: + %res = phi i8 [ 0, %entry ], [ %idx, %inbounds ] + ret i8 %res + +out_of_bounds: + ret i8 %idx; +} + +; IV = 0; IV = -2) +; +; TODO: IRCE is allowed. +; IRCE will reassociate this range check to the 'IV < (N + 2) + 1', +; since N < 126 no-overflow fact is NOT provable at compile time and +; runtime overflow check is required. +define i8 @test_overflow_check_runtime(i8 %limit, ptr %p) { +; CHECK-LABEL: define i8 @test_overflow_check_runtime +; CHECK-SAME: (i8 [[LIMIT:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N:%.*]] = load i8, ptr [[P]], align 1, !range [[RNG11]] +; CHECK-NEXT: [[PRECHECK:%.*]] = icmp sgt i8 [[LIMIT]], 0 +; CHECK-NEXT: br i1 [[PRECHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_NEXT:%.*]], [[INBOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[N]], [[IDX]] +; CHECK-NEXT: [[CHECK:%.*]] = icmp sge i8 [[SUB]], -2 +; CHECK-NEXT: br i1 [[CHECK]], label [[INBOUNDS]], label [[OUT_OF_BOUNDS:%.*]] +; CHECK: inbounds: +; CHECK-NEXT: [[IDX_NEXT]] = add nuw i8 [[IDX]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[IDX_NEXT]], [[LIMIT]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: [[IDX_LCSSA1:%.*]] = phi i8 [ [[IDX]], [[INBOUNDS]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IDX_LCSSA1]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i8 [[RES]] +; CHECK: out_of_bounds: +; CHECK-NEXT: [[IDX_LCSSA:%.*]] = phi i8 [ [[IDX]], [[LOOP]] ] +; CHECK-NEXT: ret i8 [[IDX_LCSSA]] +; +entry: + %n = load i8, ptr %p, !range !1 + %precheck = icmp sgt i8 %limit, 0 + br i1 %precheck, label %loop, label %exit + +loop: + %idx = phi i8 [ %idx.next, %inbounds ], [ 0, %entry ] + %sub = sub i8 %n, %idx + %check = icmp sge i8 %sub, -2 + br i1 %check, label %inbounds, label %out_of_bounds + +inbounds: + %idx.next = add nuw i8 %idx, 1 + %cmp = icmp slt i8 %idx.next, %limit + br i1 %cmp, label %loop, label %exit + +exit: + %res = phi i8 [ 0, %entry ], [ %idx, %inbounds ] + ret i8 %res + +out_of_bounds: + ret i8 %idx; +} + !0 = !{i8 0, i8 127} +!1 = !{i8 0, i8 126}