Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -887,6 +887,8 @@ ScalarEvolution *SE; DominatorTree *DT; + bool HasGuards; + // Result PHINode *WidePhi; Instruction *WideInc; @@ -936,13 +938,14 @@ public: WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, - SmallVectorImpl &DI) : + SmallVectorImpl &DI, bool HasGuards) : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), WideIncExpr(nullptr), @@ -1584,7 +1587,7 @@ !NarrowDefRHS->isNonNegative()) return; - auto UpdateRangeFromCondition = [&] (Value *Condition) { + auto UpdateRangeFromCondition = [&](Value *Condition) { CmpInst::Predicate P; Value *CmpRHS; if (!match(Condition, m_ICmp(P, m_Specific(NarrowDefLHS), @@ -1600,11 +1603,28 @@ updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); }; + auto UpdateRangeFromGuards = [&](Instruction *Ctx) { + if (!HasGuards) + return; + + for (BasicBlock::reverse_iterator I = Ctx->getIterator().getReverse(), + E = Ctx->getParent()->rend(); I != E; I++) { + Value *Cond = nullptr; + if (!match(&*I, m_Intrinsic( + m_Value(Cond)))) + continue; + UpdateRangeFromCondition(Cond); + } + }; + + UpdateRangeFromGuards(NarrowUser); + BasicBlock *NarrowUserBB = NarrowUser->getParent(); for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); L->contains(DTB->getBlock()); DTB = DTB->getIDom()) { auto TI = DTB->getBlock()->getTerminator(); + UpdateRangeFromGuards(TI); auto *BI = dyn_cast(TI); if (!BI || !BI->isConditional()) @@ -1688,6 +1708,10 @@ LoopInfo *LI) { SmallVector WideIVs; + auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( + Intrinsic::getName(Intrinsic::experimental_guard)); + bool HasGuards = GuardDecl && !GuardDecl->use_empty(); + SmallVector LoopPhis; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { LoopPhis.push_back(cast(I)); @@ -1717,7 +1741,7 @@ } while(!LoopPhis.empty()); for (; !WideIVs.empty(); WideIVs.pop_back()) { - WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); + WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); Index: test/Transforms/IndVarSimplify/post-inc-range.ll =================================================================== --- test/Transforms/IndVarSimplify/post-inc-range.ll +++ test/Transforms/IndVarSimplify/post-inc-range.ll @@ -101,3 +101,63 @@ exit: ret void } + +declare void @llvm.experimental.guard(i1, ...) + +define void @test_guard_in_the_same_bb(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_guard_in_the_same_bb( +; CHECK-NOT: trunc +; CHECK-NOT: icmp slt i32 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ] + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_guard_in_idom(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_guard_in_idom( +; CHECK-NOT: trunc +; CHECK-NOT: icmp slt i32 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ] + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +}