Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -452,21 +452,22 @@ // intN_ty inc = IndVarIncreasing ? 1 : -1; // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT; // - // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarNext) + // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase) // ... body ... - Value *IndVarNext; + Value *IndVarBase; Value *IndVarStart; Value *IndVarStep; Value *LoopExitAt; bool IndVarIncreasing; bool IsSignedPredicate; + bool IsIndVarNext; LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), - LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr), + LatchExit(nullptr), LatchBrExitIdx(-1), IndVarBase(nullptr), IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr), - IndVarIncreasing(false), IsSignedPredicate(true) {} + IndVarIncreasing(false), IsSignedPredicate(true), IsIndVarNext(false) {} template LoopStructure map(M Map) const { LoopStructure Result; @@ -476,12 +477,13 @@ Result.LatchBr = cast(Map(LatchBr)); Result.LatchExit = cast(Map(LatchExit)); Result.LatchBrExitIdx = LatchBrExitIdx; - Result.IndVarNext = Map(IndVarNext); + Result.IndVarBase = Map(IndVarBase); Result.IndVarStart = Map(IndVarStart); Result.IndVarStep = Map(IndVarStep); Result.LoopExitAt = Map(LoopExitAt); Result.IndVarIncreasing = IndVarIncreasing; Result.IsSignedPredicate = IsSignedPredicate; + Result.IsIndVarNext = IsIndVarNext; return Result; } @@ -829,21 +831,31 @@ return false; }; - // `ICI` is interpreted as taking the backedge if the *next* value of the - // induction variable satisfies some constraint. + // `ICI` can either be a comparison against IV or a comparison of IV.next. + // Depending on the interpretation, we calculate the start value differently. - const SCEVAddRecExpr *IndVarNext = cast(LeftSCEV); + const SCEVAddRecExpr *IVRec = cast(LeftSCEV); bool IsIncreasing = false; bool IsSignedPredicate = true; + bool IsIndVarNext = false; ConstantInt *StepCI; - if (!IsInductionVar(IndVarNext, IsIncreasing, StepCI)) { + if (!IsInductionVar(IVRec, IsIncreasing, StepCI)) { FailureReason = "LHS in icmp not induction variable"; return None; } - const SCEV *StartNext = IndVarNext->getStart(); - const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE)); - const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); + const SCEV *IndVarStart; + if (isa(LeftValue) && + cast(LeftValue)->getParent() == Header) { + // The comparison is made against IV. + IndVarStart = IVRec->getStart(); + } else { + // Assume that the comparison is made against IV.next. + const SCEV *StartNext = IVRec->getStart(); + const SCEV *Addend = SE.getNegativeSCEV(IVRec->getStepRecurrence(SE)); + IndVarStart = SE.getAddExpr(StartNext, Addend); + IsIndVarNext = true; + } const SCEV *Step = SE.getSCEV(StepCI); ConstantInt *One = ConstantInt::get(IndVarTy, 1); @@ -1023,10 +1035,11 @@ Result.LatchBrExitIdx = LatchBrExitIdx; Result.IndVarStart = IndVarStartV; Result.IndVarStep = StepCI; - Result.IndVarNext = LeftValue; + Result.IndVarBase = LeftValue; Result.IndVarIncreasing = IsIncreasing; Result.LoopExitAt = RightValue; Result.IsSignedPredicate = IsSignedPredicate; + Result.IsIndVarNext = IsIndVarNext; FailureReason = nullptr; @@ -1272,12 +1285,12 @@ Value *TakeBackedgeLoopCond = nullptr; if (Increasing) TakeBackedgeLoopCond = IsSignedPredicate - ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt) - : B.CreateICmpULT(LS.IndVarNext, ExitSubloopAt); + ? B.CreateICmpSLT(LS.IndVarBase, ExitSubloopAt) + : B.CreateICmpULT(LS.IndVarBase, ExitSubloopAt); else TakeBackedgeLoopCond = IsSignedPredicate - ? B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt) - : B.CreateICmpUGT(LS.IndVarNext, ExitSubloopAt); + ? B.CreateICmpSGT(LS.IndVarBase, ExitSubloopAt) + : B.CreateICmpUGT(LS.IndVarBase, ExitSubloopAt); Value *CondForBranch = LS.LatchBrExitIdx == 1 ? TakeBackedgeLoopCond : B.CreateNot(TakeBackedgeLoopCond); @@ -1292,12 +1305,12 @@ Value *IterationsLeft = nullptr; if (Increasing) IterationsLeft = IsSignedPredicate - ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt) - : B.CreateICmpULT(LS.IndVarNext, LS.LoopExitAt); + ? B.CreateICmpSLT(LS.IndVarBase, LS.LoopExitAt) + : B.CreateICmpULT(LS.IndVarBase, LS.LoopExitAt); else IterationsLeft = IsSignedPredicate - ? B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt) - : B.CreateICmpUGT(LS.IndVarNext, LS.LoopExitAt); + ? B.CreateICmpSGT(LS.IndVarBase, LS.LoopExitAt) + : B.CreateICmpUGT(LS.IndVarBase, LS.LoopExitAt); B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); BranchInst *BranchToContinuation = @@ -1320,10 +1333,10 @@ RRI.PHIValuesAtPseudoExit.push_back(NewPHI); } - RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end", + RRI.IndVarEnd = PHINode::Create(LS.IndVarBase->getType(), 2, "indvar.end", BranchToContinuation); RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader); - RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector); + RRI.IndVarEnd->addIncoming(LS.IndVarBase, RRI.ExitSelector); // The latch exit now has a branch from `RRI.ExitSelector' instead of // `LS.Latch'. The PHI nodes need to be updated to reflect that. @@ -1424,7 +1437,7 @@ SubRanges SR = MaybeSR.getValue(); bool Increasing = MainLoopStructure.IndVarIncreasing; IntegerType *IVTy = - cast(MainLoopStructure.IndVarNext->getType()); + cast(MainLoopStructure.IndVarBase->getType()); SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce"); Instruction *InsertPt = OriginalPreheader->getTerminator(); @@ -1699,7 +1712,10 @@ } LoopStructure LS = MaybeLoopStructure.getValue(); const SCEVAddRecExpr *IndVar = - cast(SE.getMinusSCEV(SE.getSCEV(LS.IndVarNext), SE.getSCEV(LS.IndVarStep))); + cast(SE.getSCEV(LS.IndVarBase)); + if (LS.IsIndVarNext) + IndVar = cast(SE.getMinusSCEV(IndVar, + SE.getSCEV(LS.IndVarStep))); Optional SafeIterRange; Instruction *ExprInsertPt = Preheader->getTerminator(); Index: test/Transforms/IRCE/latch-comparison-against-current-value.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/latch-comparison-against-current-value.ll @@ -0,0 +1,117 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; Check that IRCE is able to deal with loops where the latch comparison is +; done against current value of the IV, not the IV.next. + +; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds + +; SLT condition for increasing loop from 0 to 100. +define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_01 +; CHECK: entry: +; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ] +; CHECK-NEXT: %idx.next = add nuw nsw i32 %idx, 1 +; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp slt i32 %idx, 100 +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND3]], label %loop, label %main.exit.selector +; CHECK: main.exit.selector: +; CHECK-NEXT: %idx.lcssa = phi i32 [ %idx, %in.bounds ] +; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND4:%[^ ]+]] = icmp slt i32 %idx.lcssa, 100 +; CHECK-NEXT: br i1 [[COND4]], label %main.pseudo.exit, label %exit +; CHECK-NOT: loop.preloop: +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ] +; CHECK-NEXT: %idx.next.postloop = add nuw nsw i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; ULT condition for increasing loop from 0 to 100. +define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_02 +; CHECK: entry: +; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ] +; CHECK-NEXT: %idx.next = add nuw nsw i32 %idx, 1 +; CHECK-NEXT: %abc = icmp ult i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp ult i32 %idx, 100 +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ult i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND3]], label %loop, label %main.exit.selector +; CHECK: main.exit.selector: +; CHECK-NEXT: %idx.lcssa = phi i32 [ %idx, %in.bounds ] +; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND4:%[^ ]+]] = icmp ult i32 %idx.lcssa, 100 +; CHECK-NEXT: br i1 [[COND4]], label %main.pseudo.exit, label %exit +; CHECK-NOT: loop.preloop: +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ] +; CHECK-NEXT: %idx.next.postloop = add nuw nsw i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp ult i32 %idx.postloop, %exit.mainloop.at +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +!0 = !{i32 0, i32 50}