Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -176,7 +176,14 @@ Type *getType() const { return Begin->getType(); } const SCEV *getBegin() const { return Begin; } const SCEV *getEnd() const { return End; } - bool isEmpty() const { return Begin == End; } + bool isEmpty(ScalarEvolution &SE, bool IsSigned) const { + if (Begin == End) + return true; + if (IsSigned) + return SE.isKnownPredicate(ICmpInst::ICMP_SGE, Begin, End); + else + return SE.isKnownPredicate(ICmpInst::ICMP_UGE, Begin, End); + } }; /// This is the value the condition of the branch needs to evaluate to for the @@ -1655,14 +1662,15 @@ IntersectRange(ScalarEvolution &SE, const Optional &R1, const InductiveRangeCheck::Range &R2) { - if (R2.isEmpty()) + if (R2.isEmpty(SE, /* IsSigned */ true)) return None; if (!R1.hasValue()) return R2; auto &R1Value = R1.getValue(); // We never return empty ranges from this function, and R1 is supposed to be // a result of intersection. Thus, R1 is never empty. - assert(!R1Value.isEmpty() && "We should never have empty R1!"); + assert(!R1Value.isEmpty(SE, /* IsSigned */ true) && + "We should never have empty R1!"); // TODO: we could widen the smaller range and have this work; but for now we // bail out to keep things simple. @@ -1674,7 +1682,7 @@ // If the resulting range is empty, just return None. auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd); - if (Ret.isEmpty()) + if (Ret.isEmpty(SE, /* IsSigned */ true)) return None; return Ret; } @@ -1745,8 +1753,9 @@ auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, Result.getValue()); if (MaybeSafeIterRange.hasValue()) { - assert(!MaybeSafeIterRange.getValue().isEmpty() && - "We should never return empty ranges!"); + assert( + !MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) && + "We should never return empty ranges!"); RangeChecksToEliminate.push_back(IRC); SafeIterRange = MaybeSafeIterRange.getValue(); } Index: test/Transforms/IRCE/clamp.ll =================================================================== --- test/Transforms/IRCE/clamp.ll +++ test/Transforms/IRCE/clamp.ll @@ -1,3 +1,7 @@ +; This test demonstrates the confusion in ranges: we have unsigned ranges here, +; but signed comparisons in IntersectRanges produce bad results. We temporarily +; disable it and re-enable once the unsigned ranges are supported again. +; XFAIL: * ; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S < %s 2>&1 | FileCheck %s ; The test demonstrates that incorrect behavior of Clamp may lead to incorrect Index: test/Transforms/IRCE/empty_ranges.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/empty_ranges.ll @@ -0,0 +1,69 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-unsigned-latch=true -S + + +; Make sure that IRCE doesn't apply in case of empty ranges. +; (i + 30 < 40) if i in [-30, 10). +; Intersected with iteration space, it is [0, 10). +; (i - 60 < 40) if i in [60 , 100). +; The intersection with safe iteration space is the empty range [60, 10). +; It is better to eliminate one range check than attempt to eliminate both given +; that we will never go to the main loop in the latter case and basically +; only duplicate code with no benefits. + +define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_01( +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: br i1 true, label %loop.preheader, label %main.pseudo.exit +; CHECK: in.bounds.1: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %off1 = add i32 %idx, 30 +; CHECK-NEXT: %c2 = icmp slt i32 %off1, 40 +; CHECK-NEXT: br i1 true, label %in.bounds.2, label %exit.loopexit2 +; CHECK: in.bounds.2: +; CHECK-NEXT: %off2 = add i32 %idx, -60 +; CHECK-NEXT: %c3 = icmp slt i32 %off2, 40 +; CHECK-NEXT: br i1 %c3, label %in.bounds.3, label %exit.loopexit2 +; CHECK: in.bounds.3: +; CHECK-NEXT: %next = icmp ult i32 %idx.next, 100 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ult i32 %idx.next, 10 +; CHECK-NEXT: br i1 [[COND1]], label %loop, label %main.exit.selector +; CHECK: main.exit.selector: +; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds.3 ] +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ult i32 %idx.next.lcssa, 100 +; CHECK-NEXT: br i1 [[COND2]], label %main.pseudo.exit, label %exit +; CHECK: postloop: + +entry: + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds.3 ] + %idx.next = add nsw nuw i32 %idx, 1 + %c1 = icmp slt i32 %idx, 20 + br i1 %c1, label %in.bounds.1, label %out.of.bounds + +in.bounds.1: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %off1 = add i32 %idx, 30 + %c2 = icmp slt i32 %off1, 40 + br i1 %c2, label %in.bounds.2, label %exit + +in.bounds.2: + %off2 = add i32 %idx, -60 + %c3 = icmp slt i32 %off2, 40 + br i1 %c3, label %in.bounds.3, label %exit + +in.bounds.3: + %next = icmp ult i32 %idx.next, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +}