Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -892,6 +892,10 @@ ScalarEvolution *SE; DominatorTree *DT; + // Does the module have any calls to the llvm.experimental.guard intrinsic + // at all? If not we can avoid scanning instructions looking for guards. + bool HasGuards; + // Result PHINode *WidePhi; Instruction *WideInc; @@ -938,13 +942,14 @@ public: WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, - SmallVectorImpl &DI) : + SmallVectorImpl &DI, bool HasGuards) : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), WideIncExpr(nullptr), @@ -1609,6 +1614,20 @@ updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); }; + auto UpdateRangeFromGuards = [&](Instruction *Ctx) { + if (!HasGuards) + return; + + for (Instruction &I : make_range(Ctx->getIterator().getReverse(), + Ctx->getParent()->rend())) { + Value *C = nullptr; + if (match(&I, m_Intrinsic(m_Value(C)))) + UpdateRangeFromCondition(C, /*TrueDest=*/true); + } + }; + + UpdateRangeFromGuards(NarrowUser); + BasicBlock *NarrowUserBB = NarrowUser->getParent(); // If NarrowUserBB is statically unreachable asking dominator queries may // yield suprising results. (e.g. the block may not have a dom tree node) @@ -1620,6 +1639,7 @@ DTB = DTB->getIDom()) { auto *BB = DTB->getBlock(); auto *TI = BB->getTerminator(); + UpdateRangeFromGuards(TI); auto *BI = dyn_cast(TI); if (!BI || !BI->isConditional()) @@ -1711,6 +1731,10 @@ LoopInfo *LI) { SmallVector WideIVs; + auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( + Intrinsic::getName(Intrinsic::experimental_guard)); + bool HasGuards = GuardDecl && !GuardDecl->use_empty(); + SmallVector LoopPhis; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { LoopPhis.push_back(cast(I)); @@ -1740,7 +1764,7 @@ } while(!LoopPhis.empty()); for (; !WideIVs.empty(); WideIVs.pop_back()) { - WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); + WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); Index: llvm/trunk/test/Transforms/IndVarSimplify/post-inc-range.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/post-inc-range.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -173,3 +173,117 @@ exit: ret void } + +declare void @llvm.experimental.guard(i1, ...) + +define void @test_guard_one_bb(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_guard_one_bb( +; CHECK-NOT: trunc +; CHECK-NOT: icmp slt i32 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.body ] + %within_limits = icmp ult i32 %i, 64 + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ] + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_guard_in_the_same_bb(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_guard_in_the_same_bb( +; CHECK-NOT: trunc +; CHECK-NOT: icmp slt i32 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ] + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_guard_in_idom(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_guard_in_idom( +; CHECK-NOT: trunc +; CHECK-NOT: icmp slt i32 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ] + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_guard_merge_ranges(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_guard_merge_ranges( +; CHECK-NOT: trunc +; CHECK-NOT: icmp slt i32 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.body ] + %within_limits.1 = icmp ult i32 %i, 64 + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits.1) [ "deopt"() ] + %within_limits.2 = icmp ult i32 %i, 2147483647 + call void(i1, ...) @llvm.experimental.guard(i1 %within_limits.2) [ "deopt"() ] + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +}