Index: llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -112,7 +112,7 @@ cl::Hidden, cl::init(false)); static cl::opt AllowUnsignedLatchCondition("irce-allow-unsigned-latch", - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static const char *ClonedLoopTag = "irce.loop.clone"; @@ -154,10 +154,11 @@ Value *Length = nullptr; Use *CheckUse = nullptr; RangeCheckKind Kind = RANGE_CHECK_UNKNOWN; + bool IsSigned = true; static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE, Value *&Index, - Value *&Length); + Value *&Length, bool &IsSigned); static void extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse, @@ -168,6 +169,7 @@ const SCEV *getOffset() const { return Offset; } const SCEV *getScale() const { return Scale; } Value *getLength() const { return Length; } + bool isSigned() const { return IsSigned; } void print(raw_ostream &OS) const { OS << "InductiveRangeCheck:\n"; @@ -295,7 +297,7 @@ InductiveRangeCheck::RangeCheckKind InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE, Value *&Index, - Value *&Length) { + Value *&Length, bool &IsSigned) { auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) { const SCEV *S = SE.getSCEV(V); if (isa(S)) @@ -317,6 +319,7 @@ std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGE: + IsSigned = true; if (match(RHS, m_ConstantInt<0>())) { Index = LHS; return RANGE_CHECK_LOWER; @@ -327,6 +330,7 @@ std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: + IsSigned = true; if (match(RHS, m_ConstantInt<-1>())) { Index = LHS; return RANGE_CHECK_LOWER; @@ -343,6 +347,7 @@ std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_UGT: + IsSigned = false; if (IsNonNegativeAndNotLoopVarying(LHS)) { Index = RHS; Length = LHS; @@ -375,7 +380,8 @@ const auto &RChkA = SubChecks[0]; const auto &RChkB = SubChecks[1]; if ((RChkA.Length == RChkB.Length || !RChkA.Length || !RChkB.Length) && - RChkA.Offset == RChkB.Offset && RChkA.Scale == RChkB.Scale) { + RChkA.Offset == RChkB.Offset && RChkA.Scale == RChkB.Scale && + RChkA.IsSigned == RChkB.IsSigned) { // If RChkA.Kind == RChkB.Kind then we just found two identical checks. // But if one of them is a RANGE_CHECK_LOWER and the other is a // RANGE_CHECK_UPPER (only possibility if they're different) then @@ -384,6 +390,7 @@ (InductiveRangeCheck::RangeCheckKind)(RChkA.Kind | RChkB.Kind); SubChecks[0].Length = RChkA.Length ? RChkA.Length : RChkB.Length; SubChecks[0].CheckUse = &ConditionUse; + SubChecks[0].IsSigned = RChkA.IsSigned; // We updated one of the checks in place, now erase the other. SubChecks.pop_back(); @@ -399,7 +406,8 @@ return; Value *Length = nullptr, *Index; - auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length); + bool IsSigned; + auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned); if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) return; @@ -416,6 +424,7 @@ IRC.Scale = IndexAddRec->getStepRecurrence(SE); IRC.CheckUse = &ConditionUse; IRC.Kind = RCKind; + IRC.IsSigned = IsSigned; Checks.push_back(IRC); } @@ -917,9 +926,6 @@ IsSignedPredicate = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; - // FIXME: We temporarily disable unsigned latch conditions by default - // because of found problems with intersecting signed and unsigned ranges. - // We are going to turn it on once the problems are fixed. if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { FailureReason = "unsigned latch conditions are explicitly prohibited"; return None; @@ -1001,9 +1007,6 @@ IsSignedPredicate = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; - // FIXME: We temporarily disable unsigned latch conditions by default - // because of found problems with intersecting signed and unsigned ranges. - // We are going to turn it on once the problems are fixed. if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { FailureReason = "unsigned latch conditions are explicitly prohibited"; return None; @@ -1670,9 +1673,9 @@ } static Optional -IntersectRange(ScalarEvolution &SE, - const Optional &R1, - const InductiveRangeCheck::Range &R2) { +IntersectSignedRange(ScalarEvolution &SE, + const Optional &R1, + const InductiveRangeCheck::Range &R2) { if (R2.isEmpty(SE, /* IsSigned */ true)) return None; if (!R1.hasValue()) @@ -1698,6 +1701,35 @@ return Ret; } +static Optional +IntersectUnsignedRange(ScalarEvolution &SE, + const Optional &R1, + const InductiveRangeCheck::Range &R2) { + if (R2.isEmpty(SE, /* IsSigned */ false)) + return None; + if (!R1.hasValue()) + return R2; + auto &R1Value = R1.getValue(); + // We never return empty ranges from this function, and R1 is supposed to be + // a result of intersection. Thus, R1 is never empty. + assert(!R1Value.isEmpty(SE, /* IsSigned */ false) && + "We should never have empty R1!"); + + // TODO: we could widen the smaller range and have this work; but for now we + // bail out to keep things simple. + if (R1Value.getType() != R2.getType()) + return None; + + const SCEV *NewBegin = SE.getUMaxExpr(R1Value.getBegin(), R2.getBegin()); + const SCEV *NewEnd = SE.getUMinExpr(R1Value.getEnd(), R2.getEnd()); + + // If the resulting range is empty, just return None. + auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd); + if (Ret.isEmpty(SE, /* IsSigned */ false)) + return None; + return Ret; +} + bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipLoop(L)) return false; @@ -1756,11 +1788,38 @@ Instruction *ExprInsertPt = Preheader->getTerminator(); SmallVector RangeChecksToEliminate; + auto RangeIsNonNegative = [&](InductiveRangeCheck::Range &R) { + return SE.isKnownNonNegative(R.getBegin()) && + SE.isKnownNonNegative(R.getEnd()); + }; + // Basing on the type of latch predicate, we interpret the IV iteration range + // as signed or unsigned range. We use different min/max functions (signed or + // unsigned) when intersecting this range with safe iteration ranges implied + // by range checks. + auto IntersectRange = + LS.IsSignedPredicate ? IntersectSignedRange : IntersectUnsignedRange; IRBuilder<> B(ExprInsertPt); for (InductiveRangeCheck &IRC : RangeChecks) { auto Result = IRC.computeSafeIterationSpace(SE, IndVar); if (Result.hasValue()) { + // Intersecting a signed and an unsigned ranges may produce incorrect + // results because we can use neither signed nor unsigned min/max for + // reliably correct intersection if a range contains negative values + // which are either actually negative or big positive. Intersection is + // safe in two following cases: + // 1. Both ranges are signed/unsigned, then we use signed/unsigned min/max + // respectively for their intersection; + // 2. IRC safe iteration space only contains values from [0, SINT_MAX]. + // The interpretation of these values is unambiguous. + // We take the type of IV iteration range as a reference (we will + // intersect it with the resulting range of all IRC's later in + // calculateSubRanges). Only ranges of IRC of the same type are considered + // for removal unless we prove that its range doesn't contain ambiguous + // values. + if (IRC.isSigned() != LS.IsSignedPredicate && + !RangeIsNonNegative(Result.getValue())) + continue; auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, Result.getValue()); if (MaybeSafeIterRange.hasValue()) { Index: llvm/trunk/test/Transforms/IRCE/clamp.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/clamp.ll +++ llvm/trunk/test/Transforms/IRCE/clamp.ll @@ -1,8 +1,4 @@ -; This test demonstrates the confusion in ranges: we have unsigned ranges here, -; but signed comparisons in IntersectRanges produce bad results. We temporarily -; disable it and re-enable once the unsigned ranges are supported again. -; XFAIL: * -; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s ; The test demonstrates that incorrect behavior of Clamp may lead to incorrect ; calculation of post-loop exit condition. @@ -27,7 +23,10 @@ ; CHECK-NEXT: %length_gep.i146 = getelementptr inbounds i8, i8 addrspace(1)* undef, i64 8 ; CHECK-NEXT: %length_gep_typed.i147 = bitcast i8 addrspace(1)* undef to i32 addrspace(1)* ; CHECK-NEXT: %tmp43 = icmp ult i64 %indvars.iv.next467, %tmp21 -; CHECK-NEXT: br i1 false, label %loop.preheader, label %main.pseudo.exit +; CHECK-NEXT: [[C0:%[^ ]+]] = icmp ugt i64 %tmp21, 1 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[C0]], i64 %tmp21, i64 1 +; CHECK-NEXT: [[C1:%[^ ]+]] = icmp ult i64 1, %exit.mainloop.at +; CHECK-NEXT: br i1 [[C1]], label %loop.preheader, label %main.pseudo.exit %length_gep.i146 = getelementptr inbounds i8, i8 addrspace(1)* undef, i64 8 %length_gep_typed.i147 = bitcast i8 addrspace(1)* undef to i32 addrspace(1)* @@ -37,7 +36,7 @@ not_zero: ; preds = %in_bounds ; CHECK: not_zero: ; CHECK: %tmp56 = icmp ult i64 %indvars.iv.next, %tmp21 -; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i64 %indvars.iv.next, 1 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i64 %indvars.iv.next, %exit.mainloop.at ; CHECK-NEXT: br i1 [[COND]], label %loop, label %main.exit.selector %tmp51 = trunc i64 %indvars.iv.next to i32 Index: llvm/trunk/test/Transforms/IRCE/correct-loop-info.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/correct-loop-info.ll +++ llvm/trunk/test/Transforms/IRCE/correct-loop-info.ll @@ -31,7 +31,7 @@ ; CHECK-NEXT: to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp ; CHECK: bb5: ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 ; CHECK-NEXT: br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]] ; CHECK: bb8: ; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84 @@ -90,7 +90,7 @@ ; CHECK-NEXT: to label [[BB5_PRELOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT:%.*]] ; CHECK: bb5.preloop: ; CHECK-NEXT: [[TMP6_PRELOOP]] = add i32 [[TMP4_PRELOOP]], 1 -; CHECK-NEXT: [[TMP7_PRELOOP:%.*]] = icmp ult i32 [[TMP6_PRELOOP]], 1 +; CHECK-NEXT: [[TMP7_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 1 ; CHECK-NEXT: br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]] ; CHECK: bb8.preloop: ; CHECK-NEXT: [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84 @@ -112,7 +112,7 @@ ; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit ; CHECK: bb5.postloop: ; CHECK-NEXT: [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1 -; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp ult i32 [[TMP6_POSTLOOP]], 1 +; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 1 ; CHECK-NEXT: br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]] ; CHECK: bb8.postloop: ; CHECK-NEXT: [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84 @@ -135,7 +135,7 @@ bb5: ; preds = %innerheader %tmp6 = add i32 %tmp4, 1 - %tmp7 = icmp ult i32 %tmp6, 1 + %tmp7 = icmp slt i32 %tmp6, 1 br i1 %tmp7, label %bb8, label %exit3 bb8: ; preds = %bb5 Index: llvm/trunk/test/Transforms/IRCE/empty_ranges.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/empty_ranges.ll +++ llvm/trunk/test/Transforms/IRCE/empty_ranges.ll @@ -1,5 +1,4 @@ -; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S - +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S ; Make sure that IRCE doesn't apply in case of empty ranges. ; (i + 30 < 40) if i in [-30, 10). Index: llvm/trunk/test/Transforms/IRCE/eq_ne.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/eq_ne.ll +++ llvm/trunk/test/Transforms/IRCE/eq_ne.ll @@ -1,4 +1,4 @@ -; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s ; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_01u: constrained Loop at depth 1 containing: %loop
,%in.bounds Index: llvm/trunk/test/Transforms/IRCE/range_intersect_miscompile.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/range_intersect_miscompile.ll +++ llvm/trunk/test/Transforms/IRCE/range_intersect_miscompile.ll @@ -1,14 +1,39 @@ -; RUN: opt -irce -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s -; This test demonstrates a miscompile: the outer loop's IV iterates in range of -; [2, 400) and the range check is done against value 331. Due to a bug in range -; intersection IRCE manages to eliminate the range check without inserting a -; postloop, which is incorrect. So far IRCE is prohibited for this case. +; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing: +; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing: +; CHECK-NOT: irce: in function test_03: constrained Loop +; CHECK-NOT: irce: in function test_04: constrained Loop +; CHECK-LABEL: irce: in function test_05: constrained Loop at depth 1 containing: + +; This test used to demonstrate a miscompile: the outer loop's IV iterates in +; range of [2, 400) and the range check is done against value 331. Due to a bug +; in range intersection IRCE manages to eliminate the range check without +; inserting a postloop, which is incorrect. We treat the range of this test as +; an unsigned range and are able to intersect ranges correctly and insert a +; postloop. define void @test_01() { ; CHECK-LABEL: test_01 -; CHECK-NOT: br i1 true +; CHECK-NOT: preloop +; CHECK: range_check_block: ; preds = %inner_loop +; CHECK-NEXT: %range_check = icmp slt i32 %iv, 331 +; CHECK-NEXT: br i1 true, label %loop_latch +; CHECK: loop_latch: +; CHECK-NEXT: %iv_next = add i32 %iv, 1 +; CHECK-NEXT: %loop_cond = icmp ult i32 %iv_next, 400 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 %iv_next, 331 +; CHECK-NEXT: br i1 [[COND]], label %loop_header, label %main.exit.selector +; CHECK: main.exit.selector: ; preds = %loop_latch +; CHECK-NEXT: %iv_next.lcssa = phi i32 [ %iv_next, %loop_latch ] +; CHECK-NEXT: %iv.lcssa = phi i32 [ %iv, %loop_latch ] +; CHECK-NEXT: [[MES_COND:%[^ ]+]] = icmp ult i32 %iv_next.lcssa, 400 +; CHECK-NEXT: br i1 [[MES_COND]], label %main.pseudo.exit, label %exit +; CHECK: loop_latch.postloop: ; preds = %range_check_block.postloop +; CHECK-NEXT: %iv_next.postloop = add i32 %iv.postloop, 1 +; CHECK-NEXT: %loop_cond.postloop = icmp ult i32 %iv_next.postloop, 400 +; CHECK-NEXT: br i1 %loop_cond.postloop, label %loop_header.postloop, label %exit.loopexit entry: br label %loop_header @@ -43,3 +68,204 @@ deopt: ; preds = %range_check_block ret void } + +; Similar to test_01, but here the range check is done against 450. No postloop +; is required. + +define void @test_02() { + +; CHECK-LABEL: test_02 +; CHECK-NOT: preloop +; CHECK-NOT: postloop +; CHECK: range_check_block: ; preds = %inner_loop +; CHECK-NEXT: %range_check = icmp slt i32 %iv, 450 +; CHECK-NEXT: br i1 true, label %loop_latch +; CHECK: loop_latch: ; preds = %range_check_block +; CHECK-NEXT: %iv_next = add i32 %iv, 1 +; CHECK-NEXT: %loop_cond = icmp ult i32 %iv_next, 400 +; CHECK-NEXT: br i1 %loop_cond, label %loop_header, label %exit + +entry: + br label %loop_header + +loop_header: ; preds = %loop_latch, %entry + %iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ] + %iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ] + %tmp2 = icmp sgt i32 %iv.prev, -1 + br i1 %tmp2, label %loop_header.split.us, label %exit + +loop_header.split.us: ; preds = %loop_header + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %loop_header.split.us + %inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ] + %inner_iv_next = add nuw nsw i32 %inner_iv, 1 + %inner_cond = icmp ult i32 %inner_iv_next, 31 + br i1 %inner_cond, label %inner_loop, label %range_check_block + +exit: ; preds = %loop_latch, %loop_header + ret void + +range_check_block: ; preds = %inner_loop + %range_check = icmp slt i32 %iv, 450 + br i1 %range_check, label %loop_latch, label %deopt + +loop_latch: ; preds = %range_check_block + %iv_next = add i32 %iv, 1 + %loop_cond = icmp ult i32 %iv_next, 400 + br i1 %loop_cond, label %loop_header, label %exit + +deopt: ; preds = %range_check_block + ret void +} + +; Range check is made against 0, so the safe iteration range is empty. IRCE +; should not apply. + +define void @test_03() { + +; CHECK-LABEL: test_03 + +entry: + br label %loop_header + +loop_header: ; preds = %loop_latch, %entry + %iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ] + %iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ] + %tmp2 = icmp sgt i32 %iv.prev, -1 + br i1 %tmp2, label %loop_header.split.us, label %exit + +loop_header.split.us: ; preds = %loop_header + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %loop_header.split.us + %inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ] + %inner_iv_next = add nuw nsw i32 %inner_iv, 1 + %inner_cond = icmp ult i32 %inner_iv_next, 31 + br i1 %inner_cond, label %inner_loop, label %range_check_block + +exit: ; preds = %loop_latch, %loop_header + ret void + +range_check_block: ; preds = %inner_loop + %range_check = icmp slt i32 %iv, 0 + br i1 %range_check, label %loop_latch, label %deopt + +loop_latch: ; preds = %range_check_block + %iv_next = add i32 %iv, 1 + %loop_cond = icmp ult i32 %iv_next, 400 + br i1 %loop_cond, label %loop_header, label %exit + +deopt: ; preds = %range_check_block + ret void +} + +; We do not know whether %n is positive or negative, so we prohibit IRCE in +; order to avoid incorrect intersection of signed and unsigned ranges. + +define void @test_04(i32* %p) { + +; CHECK-LABEL: test_04 + +entry: + %n = load i32, i32* %p + br label %loop_header + +loop_header: ; preds = %loop_latch, %entry + %iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ] + %iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ] + %tmp2 = icmp sgt i32 %iv.prev, -1 + br i1 %tmp2, label %loop_header.split.us, label %exit + +loop_header.split.us: ; preds = %loop_header + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %loop_header.split.us + %inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ] + %inner_iv_next = add nuw nsw i32 %inner_iv, 1 + %inner_cond = icmp ult i32 %inner_iv_next, 31 + br i1 %inner_cond, label %inner_loop, label %range_check_block + +exit: ; preds = %loop_latch, %loop_header + ret void + +range_check_block: ; preds = %inner_loop + %range_check = icmp slt i32 %iv, %n + br i1 %range_check, label %loop_latch, label %deopt + +loop_latch: ; preds = %range_check_block + %iv_next = add i32 %iv, 1 + %loop_cond = icmp ult i32 %iv_next, 400 + br i1 %loop_cond, label %loop_header, label %exit + +deopt: ; preds = %range_check_block + ret void +} + +; Same as test_04, but range guarantees that %n is positive. So we can safely +; intersect ranges (with insertion of postloop). + +define void @test_05(i32* %p) { + +; CHECK-LABEL: test_05 +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: %n = load i32, i32* %p, !range !6 +; CHECK-NEXT: [[CMP_1:%[^ ]+]] = icmp ugt i32 %n, 2 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP_1]], i32 %n, i32 2 +; CHECK-NEXT: [[CMP_2:%[^ ]+]] = icmp ult i32 2, %exit.mainloop.at +; CHECK-NEXT: br i1 [[CMP_2]], label %loop_header.preheader, label %main.pseudo.exit +; CHECK: range_check_block: ; preds = %inner_loop +; CHECK-NEXT: %range_check = icmp slt i32 %iv, %n +; CHECK-NEXT: br i1 true, label %loop_latch, label %deopt.loopexit2 +; CHECK: loop_latch: ; preds = %range_check_block +; CHECK-NEXT: %iv_next = add i32 %iv, 1 +; CHECK-NEXT: %loop_cond = icmp ult i32 %iv_next, 400 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 %iv_next, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND]], label %loop_header, label %main.exit.selector +; CHECK: main.exit.selector: ; preds = %loop_latch +; CHECK-NEXT: %iv_next.lcssa = phi i32 [ %iv_next, %loop_latch ] +; CHECK-NEXT: %iv.lcssa = phi i32 [ %iv, %loop_latch ] +; CHECK-NEXT: [[MES_COND:%[^ ]+]] = icmp ult i32 %iv_next.lcssa, 400 +; CHECK-NEXT: br i1 [[MES_COND]], label %main.pseudo.exit, label %exit +; CHECK: loop_latch.postloop: ; preds = %range_check_block.postloop +; CHECK-NEXT: %iv_next.postloop = add i32 %iv.postloop, 1 +; CHECK-NEXT: %loop_cond.postloop = icmp ult i32 %iv_next.postloop, 400 +; CHECK-NEXT: br i1 %loop_cond.postloop, label %loop_header.postloop, label %exit.loopexit + +entry: + %n = load i32, i32* %p, !range !0 + br label %loop_header + +loop_header: ; preds = %loop_latch, %entry + %iv = phi i32 [ 2, %entry ], [ %iv_next, %loop_latch ] + %iv.prev = phi i32 [ 1, %entry ], [ %iv, %loop_latch ] + %tmp2 = icmp sgt i32 %iv.prev, -1 + br i1 %tmp2, label %loop_header.split.us, label %exit + +loop_header.split.us: ; preds = %loop_header + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %loop_header.split.us + %inner_iv = phi i32 [ 1, %loop_header.split.us ], [ %inner_iv_next, %inner_loop ] + %inner_iv_next = add nuw nsw i32 %inner_iv, 1 + %inner_cond = icmp ult i32 %inner_iv_next, 31 + br i1 %inner_cond, label %inner_loop, label %range_check_block + +exit: ; preds = %loop_latch, %loop_header + ret void + +range_check_block: ; preds = %inner_loop + %range_check = icmp slt i32 %iv, %n + br i1 %range_check, label %loop_latch, label %deopt + +loop_latch: ; preds = %range_check_block + %iv_next = add i32 %iv, 1 + %loop_cond = icmp ult i32 %iv_next, 400 + br i1 %loop_cond, label %loop_header, label %exit + +deopt: ; preds = %range_check_block + ret void +} + +!0 = !{i32 0, i32 50} Index: llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll +++ llvm/trunk/test/Transforms/IRCE/stride_more_than_1.ll @@ -1,4 +1,4 @@ -; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s ; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds Index: llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ugt.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ugt.ll +++ llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ugt.ll @@ -1,4 +1,4 @@ -; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s ; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds Index: llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ult.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ult.ll +++ llvm/trunk/test/Transforms/IRCE/unsigned_comparisons_ult.ll @@ -1,4 +1,4 @@ -; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S < %s 2>&1 | FileCheck %s +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s ; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds @@ -7,6 +7,8 @@ ; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_06: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK-NOT: irce: in function test_07: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_08: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_09: constrained Loop at depth 1 containing: %loop
,%in.bounds ; ULT condition for increasing loop. define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { @@ -305,4 +307,84 @@ ret void } +; Unsigned walking through signed border is allowed. +; Iteration space [0; UINT_MAX - 99), the fact that SINT_MAX is within this +; range does not prevent us from performing IRCE. + +define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_08 +; CHECK: entry: +; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ] +; CHECK-NEXT: %idx.next = add i32 %idx, 1 +; CHECK-NEXT: %abc = icmp ult i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK-NOT: loop.preloop: +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ] +; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp ult i32 %idx.postloop, %exit.mainloop.at +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, -100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Walking through the border of unsigned range is not allowed +; (iteration space [-100; 100)). Negative test. + +define void @test_09(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_09 +; CHECK-NOT: preloop +; CHECK-NOT: postloop +; CHECK-NOT: br i1 false +; CHECK-NOT: br i1 true + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ -100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + !0 = !{i32 0, i32 50}