Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -805,6 +805,25 @@ ConstantInt *One = ConstantInt::get(IndVarTy, 1); // TODO: generalize the predicates here to also match their unsigned variants. if (IsIncreasing) { + bool DecreasedRightValueByOne = false; + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (++i != len) { while (++i < len) { + // ... ---> ... + // } } + Pred = ICmpInst::ICMP_SLT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeSMin(SE, RightSCEV)) { + // while (true) { while (true) { + // if (++i == len) ---> if (++i > len - 1) + // break; break; + // ... ... + // } } + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } + bool FoundExpectedPred = (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) || (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); @@ -829,16 +848,41 @@ return None; } - IRBuilder<> B(Preheader->getTerminator()); - RightValue = B.CreateAdd(RightValue, One); + // We need to increase the right value unless we have already decreased + // it virtually when we replaced EQ with SGT. + if (!DecreasedRightValueByOne) { + IRBuilder<> B(Preheader->getTerminator()); + RightValue = B.CreateAdd(RightValue, One); + } } else { if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by upper limit"; return None; } + assert(!DecreasedRightValueByOne && + "Right value can be decreased only for LatchBrExitIdx == 0!"); } } else { + bool IncreasedRightValueByOne = false; + // Try to turn eq/ne predicates to those we can work with. + if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) + // while (--i != len) { while (--i > len) { + // ... ---> ... + // } } + Pred = ICmpInst::ICMP_SGT; + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && + !CanBeSMax(SE, RightSCEV)) { + // while (true) { while (true) { + // if (--i == len) ---> if (--i < len + 1) + // break; break; + // ... ... + // } } + Pred = ICmpInst::ICMP_SLT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } + bool FoundExpectedPred = (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) || (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); @@ -863,14 +907,20 @@ return None; } - IRBuilder<> B(Preheader->getTerminator()); - RightValue = B.CreateSub(RightValue, One); + // We need to decrease the right value unless we have already increased + // it virtually when we replaced EQ with SLT. + if (!IncreasedRightValueByOne) { + IRBuilder<> B(Preheader->getTerminator()); + RightValue = B.CreateSub(RightValue, One); + } } else { if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by lower limit"; return None; } + assert(!IncreasedRightValueByOne && + "Right value can be increased only for LatchBrExitIdx == 0!"); } } Index: test/Transforms/IRCE/eq_ne.ll =================================================================== --- test/Transforms/IRCE/eq_ne.ll +++ test/Transforms/IRCE/eq_ne.ll @@ -0,0 +1,257 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_06: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_07: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_08: constrained Loop at depth 1 containing: %loop
,%in.bounds + +; Show that IRCE can turn 'ne' condition to 'slt' in increasing IV. +define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_01 +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ne i32 %idx.next, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that if n is not known to be greater than the starting value, IRCE +; doesn't apply. +define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_02( + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ne i32 %idx.next, -100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that IRCE can turn 'eq' condition to 'sge' in increasing IV. +define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_03( +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp eq i32 %idx.next, 100 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that if n is not known to be greater than the starting value, IRCE +; doesn't apply. +define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_04( + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp eq i32 %idx.next, -100 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that IRCE can turn 'ne' condition to 'sgt' in decreasing IV. +define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_05( +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], 0 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ne i32 %idx.next, 0 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that IRCE cannot turn 'ne' condition to 'sgt' in decreasing IV if the end +; value is not proved to be less than the start value. +define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_06( + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ne i32 %idx.next, 120 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that IRCE can turn 'eq' condition to 'slt' in decreasing IV. +define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_07( +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], 0 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp eq i32 %idx.next, 0 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Show that IRCE cannot turn 'eq' condition to 'slt' in decreasing IV if the end +; value is not proved to be less than the start value. +define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_08( + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp eq i32 %idx.next, 120 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +!0 = !{i32 0, i32 50} +!1 = !{!"branch_weights", i32 64, i32 4}