Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -228,7 +228,8 @@ /// check is redundant and can be constant-folded away. The induction /// variable is not required to be the canonical {0,+,1} induction variable. Optional computeSafeIterationSpace(ScalarEvolution &SE, - const SCEVAddRecExpr *IndVar) const; + const SCEVAddRecExpr *IndVar, + bool IsLatchSigned) const; /// Parse out a set of inductive range checks from \p BI and append them to \p /// Checks. @@ -1599,7 +1600,8 @@ /// range, returns None. Optional InductiveRangeCheck::computeSafeIterationSpace( - ScalarEvolution &SE, const SCEVAddRecExpr *IndVar) const { + ScalarEvolution &SE, const SCEVAddRecExpr *IndVar, + bool IsLatchSigned) const { // IndVar is of the form "A + B * I" (where "I" is the canonical induction // variable, that may or may not exist as a real llvm::Value in the loop) and // this inductive range check is a range check on the "C + D * I" ("C" is @@ -1610,21 +1612,15 @@ // // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1) // - // The inequality is satisfied by -M <= IndVar < (L - M) [^1]. All additions - // and subtractions are twos-complement wrapping and comparisons are signed. - // - // Proof: - // - // If there exists IndVar such that -M <= IndVar < (L - M) then it follows - // that -M <= (-M + L) [== Eq. 1]. Since L >= 0, if (-M + L) sign-overflows - // then (-M + L) < (-M). Hence by [Eq. 1], (-M + L) could not have - // overflown. - // - // This means IndVar = t + (-M) for t in [0, L). Hence (IndVar + M) = t. - // Hence 0 <= (IndVar + M) < L - - // [^1]: Note that the solution does _not_ apply if L < 0; consider values M = - // 127, IndVar = 126 and L = -2 in an i8 world. + // Here L stands for upper limit of the safe iteration space. + // The inequality is satisfied by (0 - M) <= IndVar < (L - M). To avoid + // overflows when calculating (0 - M) and (L - M) we, depending on type of + // IV's iteration space, limit the calculations by borders of the iteration + // space. For example, if IndVar is unsigned, (0 - M) overflows for any M > 0. + // If we figured out that "anything greater than (-M) is safe", we strengthen + // this to "everything greater than 0 is safe", assuming that values between + // -M and 0 just do not exist in unsigned iteration space, and we don't want + // to deal with overflown values. if (!IndVar->isAffine()) return None; @@ -1641,22 +1637,64 @@ return None; assert(!D->getValue()->isZero() && "Recurrence with zero step?"); + unsigned BitWidth = cast(IndVar->getType())->getBitWidth(); + const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth)); + // Substract Y from X so that it does not go through border of the IV + // iteration space. Mathematically, it is equivalent to: + // + // Substract(X, Y) = min(max(X - Y, INT_MIN), INT_MAX). [1] + // + // In [1], 'X - Y' is a mathematical substraction (result is not bounded to + // any width of bit grid). But after we take min/max, the result is + // guaranteed to be within [INT_MIN, INT_MAX]. + // + // In [1], INT_MAX and INT_MIN are respectively signed and unsigned max/min + // values, depending on type of latch condition that defines IV iteration + // space. + auto Substract = [&](const SCEV *X, const SCEV *Y) { + assert(SE.isKnownNonNegative(X) && + "We can only substract from values in [0; SINT_MAX]!"); + if (IsLatchSigned) { + // X is a number from signed range, Y is interpreted as signed. + // Even if Y is SINT_MAX, (X - Y) does not reach SINT_MIN. So the only + // thing we should care about is that we didn't cross SINT_MAX. + // So, if Y is positive, we substract Y safely. + // Rule 1: Y > 0 ---> Y. + // If 0 <= -Y <= (SINT_MAX - X), we substract Y safely. + // Rule 2: Y >=s (X - SINT_MAX) ---> Y. + // If 0 <= (SINT_MAX - X) < -Y, we can only substract (X - SINT_MAX). + // Rule 3: Y (X - SINT_MAX). + // It gives us smax(Y, X - SINT_MAX) to substract in all cases. + const SCEV *XMinusSIntMax = SE.getMinusSCEV(X, SIntMax); + return SE.getMinusSCEV(X, SE.getSMaxExpr(Y, XMinusSIntMax)); + } else + // X is a number from unsigned range, Y is interpreted as signed. + // Even if Y is SINT_MIN, (X - Y) does not reach UINT_MAX. So the only + // thing we should care about is that we didn't cross zero. + // So, if Y is negative, we substract Y safely. + // Rule 1: Y Y. + // If 0 <= Y <= X, we substract Y safely. + // Rule 2: Y <=s X ---> Y. + // If 0 <= X < Y, we should stop at 0 and can only substract X. + // Rule 3: Y >s X ---> X. + // It gives us smin(X, Y) to substract in all cases. + return SE.getMinusSCEV(X, SE.getSMinExpr(X, Y)); + }; const SCEV *M = SE.getMinusSCEV(C, A); - const SCEV *Begin = SE.getNegativeSCEV(M); - const SCEV *UpperLimit = nullptr; + const SCEV *Zero = SE.getZero(M->getType()); + const SCEV *Begin = Substract(Zero, M); + const SCEV *L = nullptr; // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". // We can potentially do much better here. - if (const SCEV *L = getEnd()) - UpperLimit = L; + if (const SCEV *EndLimit = getEnd()) + L = EndLimit; else { assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!"); - unsigned BitWidth = cast(IndVar->getType())->getBitWidth(); - UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth)); + L = SIntMax; } - - const SCEV *End = SE.getMinusSCEV(UpperLimit, M); + const SCEV *End = Substract(L, M); return InductiveRangeCheck::Range(Begin, End); } @@ -1776,10 +1814,6 @@ Instruction *ExprInsertPt = Preheader->getTerminator(); SmallVector RangeChecksToEliminate; - auto RangeIsNonNegative = [&](InductiveRangeCheck::Range &R) { - return SE.isKnownNonNegative(R.getBegin()) && - SE.isKnownNonNegative(R.getEnd()); - }; // Basing on the type of latch predicate, we interpret the IV iteration range // as signed or unsigned range. We use different min/max functions (signed or // unsigned) when intersecting this range with safe iteration ranges implied @@ -1789,25 +1823,9 @@ IRBuilder<> B(ExprInsertPt); for (InductiveRangeCheck &IRC : RangeChecks) { - auto Result = IRC.computeSafeIterationSpace(SE, IndVar); + auto Result = IRC.computeSafeIterationSpace(SE, IndVar, + LS.IsSignedPredicate); if (Result.hasValue()) { - // Intersecting a signed and an unsigned ranges may produce incorrect - // results because we can use neither signed nor unsigned min/max for - // reliably correct intersection if a range contains negative values - // which are either actually negative or big positive. Intersection is - // safe in two following cases: - // 1. Both ranges are signed/unsigned, then we use signed/unsigned min/max - // respectively for their intersection; - // 2. IRC safe iteration space only contains values from [0, SINT_MAX]. - // The interpretation of these values is unambiguous. - // We take the type of IV iteration range as a reference (we will - // intersect it with the resulting range of all IRC's later in - // calculateSubRanges). Only ranges of IRC of the same type are considered - // for removal unless we prove that its range doesn't contain ambiguous - // values. - if (IRC.isSigned() != LS.IsSignedPredicate && - !RangeIsNonNegative(Result.getValue())) - continue; auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, Result.getValue()); if (MaybeSafeIterRange.hasValue()) { Index: test/Transforms/IRCE/clamp.ll =================================================================== --- test/Transforms/IRCE/clamp.ll +++ test/Transforms/IRCE/clamp.ll @@ -4,7 +4,7 @@ ; calculation of post-loop exit condition. ; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in_bounds,%not_zero -; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in_bounds +; CHECK-NOT: irce: in function test_02: constrained Loop define void @test_01() { @@ -69,42 +69,9 @@ define void @test_02() { -; CHECK-LABEL: test_02 -; CHECK-NOT: br i1 false, label %loop.preloop.preheader, label %preloop.pseudo.exit -; CHECK-NOT: postloop -; CHECK: entry: -; CHECK-NEXT: br i1 true, label %loop.preloop.preheader, label %preloop.pseudo.exit -; CHECK: mainloop: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: %iv1 = phi i64 [ %iv1.preloop.copy, %mainloop ], [ %iv1.next, %in_bounds ] -; CHECK-NEXT: %iv2 = phi i64 [ %iv2.preloop.copy, %mainloop ], [ %iv2.next, %in_bounds ] -; CHECK-NEXT: %iv2.offset = add i64 %iv2, 1 -; CHECK-NEXT: %rc = icmp ult i64 %iv2.offset, 400 -; CHECK-NEXT: br i1 true, label %in_bounds, label %bci_321.loopexit1 -; CHECK: in_bounds: -; CHECK-NEXT: %iv1.next = add nuw nsw i64 %iv1, 2 -; CHECK-NEXT: %iv2.next = add nuw nsw i64 %iv2, 2 -; CHECK-NEXT: %cond = icmp ugt i64 %iv1, 204 -; CHECK-NEXT: br i1 %cond, label %bci_321.loopexit1, label %loop -; CHECK: loop.preloop: -; CHECK-NEXT: %iv1.preloop = phi i64 [ %iv1.next.preloop, %in_bounds.preloop ], [ 3, %loop.preloop.preheader ] -; CHECK-NEXT: %iv2.preloop = phi i64 [ %iv2.next.preloop, %in_bounds.preloop ], [ 4294967295, %loop.preloop.preheader ] -; CHECK-NEXT: %iv2.offset.preloop = add i64 %iv2.preloop, 1 -; CHECK-NEXT: %rc.preloop = icmp ult i64 %iv2.offset.preloop, 400 -; CHECK-NEXT: br i1 %rc.preloop, label %in_bounds.preloop, label %bci_321.loopexit -; CHECK: in_bounds.preloop: -; CHECK-NEXT: %iv1.next.preloop = add nuw nsw i64 %iv1.preloop, 2 -; CHECK-NEXT: %iv2.next.preloop = add nuw nsw i64 %iv2.preloop, 2 -; CHECK-NEXT: %cond.preloop = icmp ugt i64 %iv1.preloop, 204 -; CHECK-NEXT: [[C0:%[^ ]+]] = icmp ult i64 %iv1.preloop, 205 -; CHECK-NEXT: [[C1:%[^ ]+]] = xor i1 [[C0]], true -; CHECK-NEXT: br i1 [[C1]], label %preloop.exit.selector, label %loop.preloop -; CHECK: preloop.pseudo.exit: -; CHECK-NEXT: %iv1.preloop.copy = phi i64 [ 3, %entry ], [ %iv1.next.preloop.lcssa, %preloop.exit.selector ] -; CHECK-NEXT: %iv2.preloop.copy = phi i64 [ 4294967295, %entry ], [ %iv2.next.preloop.lcssa, %preloop.exit.selector ] -; CHECK-NEXT: %indvar.end = phi i64 [ 1, %entry ], [ %iv1.preloop.lcssa, %preloop.exit.selector ] -; CHECK-NEXT: br label %mainloop +; Now IRCE is smart enough to understand that the safe range here is empty. +; Previously it executed the entire loop in safe preloop and never actually +; entered the main loop. entry: br label %loop Index: test/Transforms/IRCE/range_intersect_miscompile.ll =================================================================== --- test/Transforms/IRCE/range_intersect_miscompile.ll +++ test/Transforms/IRCE/range_intersect_miscompile.ll @@ -2,8 +2,8 @@ ; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing: ; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing: -; CHECK-NOT: irce: in function test_03: constrained Loop -; CHECK-NOT: irce: in function test_04: constrained Loop +; CHECK-LABEL: irce: in function test_03: constrained Loop at depth 1 containing: +; CHECK-LABEL: irce: in function test_04: constrained Loop at depth 1 containing: ; CHECK-LABEL: irce: in function test_05: constrained Loop at depth 1 containing: ; This test used to demonstrate a miscompile: the outer loop's IV iterates in @@ -120,11 +120,18 @@ } ; Range check is made against 0, so the safe iteration range is empty. IRCE -; should not apply. +; should not apply to the inner loop. The condition %tmp2 can be eliminated. define void @test_03() { ; CHECK-LABEL: test_03 +; CHECK-NOT: preloop +; CHECK-NOT: postloop +; CHECK: %tmp2 = icmp sgt i32 %iv.prev, -1 +; CHECK-NEXT: br i1 true, label %loop_header.split.us, label %exit +; CHECK: range_check_block: +; CHECK-NEXT: %range_check = icmp slt i32 %iv, 0 +; CHECK-NEXT: br i1 %range_check, label %loop_latch, label %deopt entry: br label %loop_header @@ -162,10 +169,18 @@ ; We do not know whether %n is positive or negative, so we prohibit IRCE in ; order to avoid incorrect intersection of signed and unsigned ranges. +; The condition %tmp2 can be eliminated. define void @test_04(i32* %p) { ; CHECK-LABEL: test_04 +; CHECK-NOT: preloop +; CHECK-NOT: postloop +; CHECK: %tmp2 = icmp sgt i32 %iv.prev, -1 +; CHECK-NEXT: br i1 true, label %loop_header.split.us, label %exit +; CHECK: range_check_block: +; CHECK-NEXT: %range_check = icmp slt i32 %iv, %n +; CHECK-NEXT: br i1 %range_check, label %loop_latch, label %deopt entry: %n = load i32, i32* %p Index: test/Transforms/IRCE/ranges_of_different_types.ll =================================================================== --- test/Transforms/IRCE/ranges_of_different_types.ll +++ test/Transforms/IRCE/ranges_of_different_types.ll @@ -0,0 +1,427 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; Make sure we can eliminate range check with signed latch, unsigned IRC and +; positive offset. The safe iteration space is: +; No preloop, +; %exit.mainloop.at = smax (0, -1 - smax(12 - %len, -102)). +; Formula verification: +; %len = 10 +; %exit.mainloop.at = 0 +; %len = 50 +; %exit.mainloop.at = 50 - 13 = 37. +; %len = 100 +; %exit.mainloop.at = 100 - 13 = 87. +; %len = 150 +; %exit.mainloop.at = 101. +; %len = SINT_MAX +; %exit.mainloop.at = 101 + +define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_01( +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop +; CHECK: br i1 true, label %in.bounds +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = add i32 %idx, 13 + %abc = icmp ult i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Make sure we can eliminate range check with signed latch, unsigned IRC and +; negative offset. The safe iteration space is: +; %exit.preloop.at = 13 +; %exit.mainloop.at = smax(-1 - smax(smax(%len - SINT_MAX, -13) - 1 - %len, -102), 0) +; Formula verification: +; %len = 10 +; %exit.mainloop.at = 0 +; %len = 50 +; %exit.mainloop.at = 63 +; %len = 100 +; %exit.mainloop.at = 101 +; %len = 150 +; %exit.mainloop.at = 101 +; %len = SINT_MAX +; %exit.mainloop.at = 101 + +define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_02( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 +; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: br i1 true, label %loop.preloop.preheader +; CHECK: loop.preloop: +; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] +; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, 1 +; CHECK-NEXT: %idx.offset.preloop = sub i32 %idx.preloop, 13 +; CHECK-NEXT: %abc.preloop = icmp ult i32 %idx.offset.preloop, %len +; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.next.preloop, 101 +; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13 +; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = sub i32 %idx, 13 + %abc = icmp ult i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Make sure we can eliminate range check with unsigned latch, signed IRC and +; positive offset. The safe iteration space is: +; No preloop, +; %exit.mainloop.at = -1 - umax(-2 - %len - smax(-1 - %len, -14), -102) +; Formula verification: +; %len = 10 +; %exit.mainloop.at = 0 +; %len = 50 +; %exit.mainloop.at = 37 +; %len = 100 +; %exit.mainloop.at = 87 +; %len = 150 +; %exit.mainloop.at = 101 +; %len = SINT_MAX +; %exit.mainloop.at = 101 + +define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_03( +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len +; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 +; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 +; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = add i32 %idx, 13 + %abc = icmp slt i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Make sure we can eliminate range check with unsigned latch, signed IRC and +; positive offset. The safe iteration space is: +; %exit.preloop.at = 13 +; %exit.mainloop.at = -1 - umax(-14 - %len, -102) +; Formula verification: +; %len = 10 +; %exit.mainloop.at = 23 +; %len = 50 +; %exit.mainloop.at = 63 +; %len = 100 +; %exit.mainloop.at = 101 +; %len = 150 +; %exit.mainloop.at = 101 +; %len = SINT_MAX +; %exit.mainloop.at = 101 + +define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_04( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 +; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: br i1 true, label %loop.preloop.preheader +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp ult i32 %idx.next.preloop, 101 +; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13 +; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = sub i32 %idx, 13 + %abc = icmp slt i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Signed latch, signed RC, positive offset. Same as test_01. +define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_05( +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop +; CHECK: br i1 true, label %in.bounds +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = add i32 %idx, 13 + %abc = icmp slt i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Signed latch, signed RC, negative offset. Same as test_02. +define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_06( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 +; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: br i1 true, label %loop.preloop.preheader +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.next.preloop, 101 +; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13 +; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = sub i32 %idx, 13 + %abc = icmp slt i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Unsigned latch, Unsigned RC, negative offset. Same as test_03. +define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_07( +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len +; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 +; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 +; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop +; CHECK: br i1 true, label %in.bounds +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = add i32 %idx, 13 + %abc = icmp ult i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Unsigned latch, Unsigned RC, negative offset. Same as test_04. +define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_08( +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 +; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: br i1 true, label %loop.preloop.preheader +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp ult i32 %idx.next.preloop, 101 +; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13 +; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector +; CHECK: postloop: + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.offset = sub i32 %idx, 13 + %abc = icmp ult i32 %idx.offset, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 101 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +!0 = !{i32 0, i32 2147483647} Index: test/Transforms/IRCE/single-access-with-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-with-preloop.ll +++ test/Transforms/IRCE/single-access-with-preloop.ll @@ -30,7 +30,10 @@ ; CHECK-LABEL: @single_access_with_preloop( ; CHECK: loop.preheader: -; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1 +; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 +; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 +; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. +; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] ; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] @@ -39,11 +42,17 @@ ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 -; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 %offset, -1 +; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647 +; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] +; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] +; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 +; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. ; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len ; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] ; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] -; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 %offset, -2147483648 +; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 +; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 +; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 ; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] ; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] ; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] @@ -57,7 +66,7 @@ ; CHECK: %abc.high = icmp slt i32 %array.idx, %len ; CHECK: %abc.low = icmp sge i32 %array.idx, 0 ; CHECK: %abc = and i1 true, true -; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit8 +; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11 ; CHECK: in.bounds: ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]