Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -81,6 +81,11 @@ clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible"))); +static cl::opt UsePostIncrementRanges( + "indvars-post-increment-ranges", cl::Hidden, + cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), + cl::init(true)); + namespace { struct RewritePhi; @@ -903,6 +908,33 @@ // Value: the kind of extension used to widen this Instruction. DenseMap, ExtendKind> ExtendKindMap; + typedef std::pair, AssertingVH> DefUserPair; + // A map with control-dependent ranges for post increment IV uses. The key is + // a pair of IV def and a use of this def denoting the context. The value is + // a ConstantRange representing possible values of the def at the given + // context. + DenseMap PostIncRangeInfos; + + Optional getPostIncRangeInfo(Value *Def, + Instruction *UseI) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + return It == PostIncRangeInfos.end() + ? Optional(None) + : Optional(It->second); + } + + void calculatePostIncRanges(PHINode *OrigPhi); + void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + if (It == PostIncRangeInfos.end()) + PostIncRangeInfos.insert({Key, R}); + else + It->second = R.intersectWith(It->second); + } + public: WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, @@ -1429,7 +1461,7 @@ /// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); - bool NeverNegative = + bool NonNegativeDef = SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, SE->getConstant(NarrowSCEV->getType(), 0)); for (User *U : NarrowDef->users()) { @@ -1439,7 +1471,15 @@ if (!Widened.insert(NarrowUser).second) continue; - NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative); + bool NonNegativeUse = false; + if (!NonNegativeDef) { + // We might have a control-dependent range information for this context. + if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) + NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); + } + + NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, + NonNegativeDef || NonNegativeUse); } } @@ -1479,6 +1519,19 @@ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && "Loop header phi recurrence inputs do not dominate the loop"); + // Iterate over IV uses (including transitive ones) looking for IV increments + // of the form 'add nsw %iv, '. For each increment and each use of + // the increment calculate control-dependent range information basing on + // dominating conditions inside of the loop (e.g. a range check inside of the + // loop). Calculated ranges are stored in PostIncRangeInfos map. + // + // Control-dependent range information is later used to prove that a narrow + // definition is not negative (see pushNarrowIVUsers). It's difficult to do + // this on demand because when pushNarrowIVUsers needs this information some + // of the dominating conditions might be already widened. + if (UsePostIncrementRanges) + calculatePostIncRanges(OrigPhi); + // The rewriter provides a value for the desired IV expression. This may // either find an existing phi or materialize a new one. Either way, we // expect a well-formed cyclic phi-with-increments. i.e. any operand not part @@ -1523,6 +1576,99 @@ return WidePhi; } +/// Calculates control-dependent range for the given def at the given context +/// by looking at dominating conditions inside of the loop +void WidenIV::calculatePostIncRange(Instruction *NarrowDef, + Instruction *NarrowUser) { + using namespace llvm::PatternMatch; + + Value *NarrowDefLHS; + const APInt *NarrowDefRHS; + if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), + m_APInt(NarrowDefRHS))) || + !NarrowDefRHS->isNonNegative()) + return; + + auto UpdateRangeFromCondition = [&] (Value *Condition, + bool TrueDest) { + CmpInst::Predicate Pred; + Value *CmpRHS; + if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), + m_Value(CmpRHS)))) + return; + + CmpInst::Predicate P = + TrueDest ? Pred : CmpInst::getInversePredicate(Pred); + + auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); + auto CmpConstrainedLHSRange = + ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); + auto NarrowDefRange = + CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); + + updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); + }; + + BasicBlock *NarrowUserBB = NarrowUser->getParent(); + // If NarrowUserBB is statically unreachable asking dominator queries may + // yield suprising results. (e.g. the block may not have a dom tree node) + if (!DT->isReachableFromEntry(NarrowUserBB)) + return; + + for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); + L->contains(DTB->getBlock()); + DTB = DTB->getIDom()) { + auto *BB = DTB->getBlock(); + auto *TI = BB->getTerminator(); + + auto *BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + continue; + + auto *TrueSuccessor = BI->getSuccessor(0); + auto *FalseSuccessor = BI->getSuccessor(1); + + auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { + return BBE.isSingleEdge() && + DT->dominates(BBE, NarrowUser->getParent()); + }; + + if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); + + if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) + UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); + } +} + +/// Calculates PostIncRangeInfos map for the given IV +void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(OrigPhi); + Visited.insert(OrigPhi); + + while (!Worklist.empty()) { + Instruction *NarrowDef = Worklist.pop_back_val(); + + for (Use &U : NarrowDef->uses()) { + auto *NarrowUser = cast(U.getUser()); + + // Don't go looking outside the current loop. + auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; + if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) + continue; + + if (!Visited.insert(NarrowUser).second) + continue; + + Worklist.push_back(NarrowUser); + + calculatePostIncRange(NarrowDef, NarrowUser); + } + } +} + //===----------------------------------------------------------------------===// // Live IV Reduction - Minimize IVs live across the loop. //===----------------------------------------------------------------------===// Index: llvm/trunk/test/Transforms/IndVarSimplify/post-inc-range.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/post-inc-range.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -0,0 +1,175 @@ +; RUN: opt < %s -indvars -indvars-post-increment-ranges -S | FileCheck %s + +target datalayout = "p:64:64:64-n32:64" + +; When the IV in this loop is widened we want to widen this use as well: +; icmp slt i32 %i.inc, %limit +; In order to do this indvars need to prove that the narrow IV def (%i.inc) +; is not-negative from the range check inside of the loop. +define void @test(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test( +; CHECK-NOT: trunc + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_false_edge(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_false_edge( +; CHECK-NOT: trunc + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %out_of_bounds = icmp ugt i32 %i, 64 + br i1 %out_of_bounds, label %for.end, label %continue + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_range_metadata(i32* %array_length_ptr, i32* %base, + i32 %limit, i32 %start) { +; CHECK-LABEL: @test_range_metadata( +; CHECK-NOT: trunc + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %array_length = load i32, i32* %array_length_ptr, !range !{i32 0, i32 64 } + %within_limits = icmp ult i32 %i, %array_length + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +; Negative version of the test above, we don't know anything about +; array_length_ptr range. +define void @test_neg(i32* %array_length_ptr, i32* %base, + i32 %limit, i32 %start) { +; CHECK-LABEL: @test_neg( +; CHECK: trunc i64 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %array_length = load i32, i32* %array_length_ptr + %within_limits = icmp ult i32 %i, %array_length + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_transitive_use(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test_transitive_use( +; CHECK-NOT: trunc +; CHECK: %result = icmp slt i64 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.mul.3 = mul nsw nuw i32 %i, 3 + %mul_within = icmp ult i32 %i.mul.3, 64 + br i1 %mul_within, label %guarded, label %continue.2 + +guarded: + %i.mul.3.inc = add nsw nuw i32 %i.mul.3, 1 + %result = icmp slt i32 %i.mul.3.inc, %limit + br i1 %result, label %continue.2, label %for.end + +continue.2: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + + +for.end: + br label %exit + +exit: + ret void +}